lib/Target/PowerPC/PPCISelLowering.cpp

   1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the PPCISelLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "PPCISelLowering.h"
  15 #include "MCTargetDesc/PPCPredicates.h"
  16 #include "PPCMachineFunctionInfo.h"
  17 #include "PPCPerfectShuffle.h"
  18 #include "PPCTargetMachine.h"
  19 #include "PPCTargetObjectFile.h"
  20 #include "llvm/ADT/STLExtras.h"
  21 #include "llvm/ADT/StringSwitch.h"
  22 #include "llvm/ADT/Triple.h"
  23 #include "llvm/CodeGen/CallingConvLower.h"
  24 #include "llvm/CodeGen/MachineFrameInfo.h"
  25 #include "llvm/CodeGen/MachineFunction.h"
  26 #include "llvm/CodeGen/MachineInstrBuilder.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/CodeGen/SelectionDAG.h"
  29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  30 #include "llvm/IR/CallingConv.h"
  31 #include "llvm/IR/Constants.h"
  32 #include "llvm/IR/DerivedTypes.h"
  33 #include "llvm/IR/Function.h"
  34 #include "llvm/IR/Intrinsics.h"
  35 #include "llvm/Support/CommandLine.h"
  36 #include "llvm/Support/ErrorHandling.h"
  37 #include "llvm/Support/MathExtras.h"
  38 #include "llvm/Support/raw_ostream.h"
  39 #include "llvm/Target/TargetOptions.h"
  40 using namespace llvm;
  41
  42 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
  43 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
  44
  45 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
  46 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
  47
  48 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
  49 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
  50
  51 // FIXME: Remove this once the bug has been fixed!
  52 extern cl::opt<bool> ANDIGlueBug;
  53
  54 static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
  55   // If it isn't a Mach-O file then it's going to be a linux ELF
  56   // object file.
  57   if (TT.isOSDarwin())
  58     return new TargetLoweringObjectFileMachO();
  59
  60   return new PPC64LinuxTargetObjectFile();
  61 }
  62
  63 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
  64     : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))),
  65       Subtarget(*TM.getSubtargetImpl()) {
  66   setPow2DivIsCheap();
  67
  68   // Use _setjmp/_longjmp instead of setjmp/longjmp.
  69   setUseUnderscoreSetJmp(true);
  70   setUseUnderscoreLongJmp(true);
  71
  72   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
  73   // arguments are at least 4/8 bytes aligned.
  74   bool isPPC64 = Subtarget.isPPC64();
  75   setMinStackArgumentAlignment(isPPC64 ? 8:4);
  76
  77   // Set up the register classes.
  78   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
  79   addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
  80   addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
  81
  82   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
  83   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
  84   setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
  85
  86   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
  87
  88   // PowerPC has pre-inc load and store's.
  89   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
  90   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
  91   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
  92   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
  93   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
  94   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
  95   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
  96   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
  97   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
  98   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
  99
 100   if (Subtarget.useCRBits()) {
 101     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 102
 103     if (isPPC64 || Subtarget.hasFPCVT()) {
 104       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
 105       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
 106                          isPPC64 ? MVT::i64 : MVT::i32);
 107       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
 108       AddPromotedToType (ISD::UINT_TO_FP, MVT::i1,
 109                          isPPC64 ? MVT::i64 : MVT::i32);
 110     } else {
 111       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
 112       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
 113     }
 114
 115     // PowerPC does not support direct load / store of condition registers
 116     setOperationAction(ISD::LOAD, MVT::i1, Custom);
 117     setOperationAction(ISD::STORE, MVT::i1, Custom);
 118
 119     // FIXME: Remove this once the ANDI glue bug is fixed:
 120     if (ANDIGlueBug)
 121       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
 122
 123     setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 124     setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 125     setTruncStoreAction(MVT::i64, MVT::i1, Expand);
 126     setTruncStoreAction(MVT::i32, MVT::i1, Expand);
 127     setTruncStoreAction(MVT::i16, MVT::i1, Expand);
 128     setTruncStoreAction(MVT::i8, MVT::i1, Expand);
 129
 130     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
 131   }
 132
 133   // This is used in the ppcf128->int sequence.  Note it has different semantics
 134   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
 135   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
 136
 137   // We do not currently implement these libm ops for PowerPC.
 138   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
 139   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
 140   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
 141   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
 142   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
 143   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
 144
 145   // PowerPC has no SREM/UREM instructions
 146   setOperationAction(ISD::SREM, MVT::i32, Expand);
 147   setOperationAction(ISD::UREM, MVT::i32, Expand);
 148   setOperationAction(ISD::SREM, MVT::i64, Expand);
 149   setOperationAction(ISD::UREM, MVT::i64, Expand);
 150
 151   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
 152   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 153   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 154   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 155   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 156   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 157   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
 158   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
 159   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
 160
 161   // We don't support sin/cos/sqrt/fmod/pow
 162   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 163   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 164   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
 165   setOperationAction(ISD::FREM , MVT::f64, Expand);
 166   setOperationAction(ISD::FPOW , MVT::f64, Expand);
 167   setOperationAction(ISD::FMA  , MVT::f64, Legal);
 168   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 169   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 170   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
 171   setOperationAction(ISD::FREM , MVT::f32, Expand);
 172   setOperationAction(ISD::FPOW , MVT::f32, Expand);
 173   setOperationAction(ISD::FMA  , MVT::f32, Legal);
 174
 175   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 176
 177   // If we're enabling GP optimizations, use hardware square root
 178   if (!Subtarget.hasFSQRT() &&
 179       !(TM.Options.UnsafeFPMath &&
 180         Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
 181     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 182
 183   if (!Subtarget.hasFSQRT() &&
 184       !(TM.Options.UnsafeFPMath &&
 185         Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
 186     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 187
 188   if (Subtarget.hasFCPSGN()) {
 189     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
 190     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
 191   } else {
 192     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 193     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 194   }
 195
 196   if (Subtarget.hasFPRND()) {
 197     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
 198     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
 199     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
 200     setOperationAction(ISD::FROUND, MVT::f64, Legal);
 201
 202     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
 203     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
 204     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
 205     setOperationAction(ISD::FROUND, MVT::f32, Legal);
 206   }
 207
 208   // PowerPC does not have BSWAP, CTPOP or CTTZ
 209   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
 210   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
 211   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
 212   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
 213   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
 214   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
 215   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
 216   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
 217
 218   if (Subtarget.hasPOPCNTD()) {
 219     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
 220     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
 221   } else {
 222     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
 223     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
 224   }
 225
 226   // PowerPC does not have ROTR
 227   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
 228   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
 229
 230   if (!Subtarget.useCRBits()) {
 231     // PowerPC does not have Select
 232     setOperationAction(ISD::SELECT, MVT::i32, Expand);
 233     setOperationAction(ISD::SELECT, MVT::i64, Expand);
 234     setOperationAction(ISD::SELECT, MVT::f32, Expand);
 235     setOperationAction(ISD::SELECT, MVT::f64, Expand);
 236   }
 237
 238   // PowerPC wants to turn select_cc of FP into fsel when possible.
 239   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 240   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 241
 242   // PowerPC wants to optimize integer setcc a bit
 243   if (!Subtarget.useCRBits())
 244     setOperationAction(ISD::SETCC, MVT::i32, Custom);
 245
 246   // PowerPC does not have BRCOND which requires SetCC
 247   if (!Subtarget.useCRBits())
 248     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 249
 250   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
 251
 252   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
 253   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 254
 255   // PowerPC does not have [U|S]INT_TO_FP
 256   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
 257   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 258
 259   setOperationAction(ISD::BITCAST, MVT::f32, Expand);
 260   setOperationAction(ISD::BITCAST, MVT::i32, Expand);
 261   setOperationAction(ISD::BITCAST, MVT::i64, Expand);
 262   setOperationAction(ISD::BITCAST, MVT::f64, Expand);
 263
 264   // We cannot sextinreg(i1).  Expand to shifts.
 265   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 266
 267   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
 268   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
 269   // support continuation, user-level threading, and etc.. As a result, no
 270   // other SjLj exception interfaces are implemented and please don't build
 271   // your own exception handling based on them.
 272   // LLVM/Clang supports zero-cost DWARF exception handling.
 273   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
 274   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 275
 276   // We want to legalize GlobalAddress and ConstantPool nodes into the
 277   // appropriate instructions to materialize the address.
 278   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 279   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
 280   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
 281   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
 282   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
 283   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 284   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
 285   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
 286   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
 287   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
 288
 289   // TRAP is legal.
 290   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 291
 292   // TRAMPOLINE is custom lowered.
 293   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
 294   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
 295
 296   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 297   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 298
 299   if (Subtarget.isSVR4ABI()) {
 300     if (isPPC64) {
 301       // VAARG always uses double-word chunks, so promote anything smaller.
 302       setOperationAction(ISD::VAARG, MVT::i1, Promote);
 303       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
 304       setOperationAction(ISD::VAARG, MVT::i8, Promote);
 305       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
 306       setOperationAction(ISD::VAARG, MVT::i16, Promote);
 307       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
 308       setOperationAction(ISD::VAARG, MVT::i32, Promote);
 309       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
 310       setOperationAction(ISD::VAARG, MVT::Other, Expand);
 311     } else {
 312       // VAARG is custom lowered with the 32-bit SVR4 ABI.
 313       setOperationAction(ISD::VAARG, MVT::Other, Custom);
 314       setOperationAction(ISD::VAARG, MVT::i64, Custom);
 315     }
 316   } else
 317     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 318
 319   if (Subtarget.isSVR4ABI() && !isPPC64)
 320     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
 321     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
 322   else
 323     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 324
 325   // Use the default implementation.
 326   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 327   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 328   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
 329   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
 330   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
 331
 332   // We want to custom lower some of our intrinsics.
 333   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 334
 335   // To handle counter-based loop conditions.
 336   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
 337
 338   // Comparisons that require checking two conditions.
 339   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
 340   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
 341   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
 342   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
 343   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
 344   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
 345   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
 346   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
 347   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
 348   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
 349   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
 350   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
 351
 352   if (Subtarget.has64BitSupport()) {
 353     // They also have instructions for converting between i64 and fp.
 354     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 355     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 356     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 357     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
 358     // This is just the low 32 bits of a (signed) fp->i64 conversion.
 359     // We cannot do this with Promote because i64 is not a legal type.
 360     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 361
 362     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
 363       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 364   } else {
 365     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
 366     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
 367   }
 368
 369   // With the instructions enabled under FPCVT, we can do everything.
 370   if (Subtarget.hasFPCVT()) {
 371     if (Subtarget.has64BitSupport()) {
 372       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 373       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 374       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 375       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 376     }
 377
 378     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 379     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 380     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 381     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 382   }
 383
 384   if (Subtarget.use64BitRegs()) {
 385     // 64-bit PowerPC implementations can support i64 types directly
 386     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
 387     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 388     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 389     // 64-bit PowerPC wants to expand i128 shifts itself.
 390     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
 391     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
 392     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
 393   } else {
 394     // 32-bit PowerPC wants to expand i64 shifts itself.
 395     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
 396     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
 397     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
 398   }
 399
 400   if (Subtarget.hasAltivec()) {
 401     // First set operation action for all vector types to expand. Then we
 402     // will selectively turn on ones that can be effectively codegen'd.
 403     for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 404          i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 405       MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 406
 407       // add/sub are legal for all supported vector VT's.
 408       setOperationAction(ISD::ADD , VT, Legal);
 409       setOperationAction(ISD::SUB , VT, Legal);
 410
 411       // We promote all shuffles to v16i8.
 412       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
 413       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
 414
 415       // We promote all non-typed operations to v4i32.
 416       setOperationAction(ISD::AND   , VT, Promote);
 417       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
 418       setOperationAction(ISD::OR    , VT, Promote);
 419       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
 420       setOperationAction(ISD::XOR   , VT, Promote);
 421       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
 422       setOperationAction(ISD::LOAD  , VT, Promote);
 423       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
 424       setOperationAction(ISD::SELECT, VT, Promote);
 425       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
 426       setOperationAction(ISD::STORE, VT, Promote);
 427       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
 428
 429       // No other operations are legal.
 430       setOperationAction(ISD::MUL , VT, Expand);
 431       setOperationAction(ISD::SDIV, VT, Expand);
 432       setOperationAction(ISD::SREM, VT, Expand);
 433       setOperationAction(ISD::UDIV, VT, Expand);
 434       setOperationAction(ISD::UREM, VT, Expand);
 435       setOperationAction(ISD::FDIV, VT, Expand);
 436       setOperationAction(ISD::FREM, VT, Expand);
 437       setOperationAction(ISD::FNEG, VT, Expand);
 438       setOperationAction(ISD::FSQRT, VT, Expand);
 439       setOperationAction(ISD::FLOG, VT, Expand);
 440       setOperationAction(ISD::FLOG10, VT, Expand);
 441       setOperationAction(ISD::FLOG2, VT, Expand);
 442       setOperationAction(ISD::FEXP, VT, Expand);
 443       setOperationAction(ISD::FEXP2, VT, Expand);
 444       setOperationAction(ISD::FSIN, VT, Expand);
 445       setOperationAction(ISD::FCOS, VT, Expand);
 446       setOperationAction(ISD::FABS, VT, Expand);
 447       setOperationAction(ISD::FPOWI, VT, Expand);
 448       setOperationAction(ISD::FFLOOR, VT, Expand);
 449       setOperationAction(ISD::FCEIL,  VT, Expand);
 450       setOperationAction(ISD::FTRUNC, VT, Expand);
 451       setOperationAction(ISD::FRINT,  VT, Expand);
 452       setOperationAction(ISD::FNEARBYINT, VT, Expand);
 453       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
 454       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
 455       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
 456       setOperationAction(ISD::MULHU, VT, Expand);
 457       setOperationAction(ISD::MULHS, VT, Expand);
 458       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 459       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 460       setOperationAction(ISD::UDIVREM, VT, Expand);
 461       setOperationAction(ISD::SDIVREM, VT, Expand);
 462       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
 463       setOperationAction(ISD::FPOW, VT, Expand);
 464       setOperationAction(ISD::BSWAP, VT, Expand);
 465       setOperationAction(ISD::CTPOP, VT, Expand);
 466       setOperationAction(ISD::CTLZ, VT, Expand);
 467       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
 468       setOperationAction(ISD::CTTZ, VT, Expand);
 469       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
 470       setOperationAction(ISD::VSELECT, VT, Expand);
 471       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 472
 473       for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 474            j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
 475         MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j;
 476         setTruncStoreAction(VT, InnerVT, Expand);
 477       }
 478       setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
 479       setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
 480       setLoadExtAction(ISD::EXTLOAD, VT, Expand);
 481     }
 482
 483     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
 484     // with merges, splats, etc.
 485     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
 486
 487     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
 488     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
 489     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
 490     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
 491     setOperationAction(ISD::SELECT, MVT::v4i32,
 492                        Subtarget.useCRBits() ? Legal : Expand);
 493     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
 494     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
 495     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
 496     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
 497     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
 498     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
 499     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
 500     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
 501     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
 502
 503     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
 504     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
 505     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
 506     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
 507
 508     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
 509     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
 510
 511     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
 512       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 513       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
 514     }
 515
 516     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
 517     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
 518     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 519
 520     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 521     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
 522
 523     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
 524     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
 525     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
 526     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 527
 528     // Altivec does not contain unordered floating-point compare instructions
 529     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
 530     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
 531     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
 532     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
 533
 534     if (Subtarget.hasVSX()) {
 535       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
 536       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
 537
 538       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
 539       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
 540       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
 541       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
 542       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
 543
 544       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
 545
 546       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
 547       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
 548
 549       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
 550       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
 551
 552       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
 553       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
 554       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
 555       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
 556       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
 557
 558       // Share the Altivec comparison restrictions.
 559       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
 560       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
 561       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
 562       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
 563
 564       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
 565       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
 566
 567       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
 568
 569       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
 570
 571       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
 572       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
 573
 574       // VSX v2i64 only supports non-arithmetic operations.
 575       setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 576       setOperationAction(ISD::SUB, MVT::v2i64, Expand);
 577
 578       setOperationAction(ISD::SHL, MVT::v2i64, Expand);
 579       setOperationAction(ISD::SRA, MVT::v2i64, Expand);
 580       setOperationAction(ISD::SRL, MVT::v2i64, Expand);
 581
 582       setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
 583
 584       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
 585       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
 586       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
 587       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
 588
 589       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
 590
 591       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
 592       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
 593       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
 594       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
 595
 596       // Vector operation legalization checks the result type of
 597       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
 598       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
 599       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
 600       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
 601       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
 602
 603       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
 604     }
 605   }
 606
 607   if (Subtarget.has64BitSupport()) {
 608     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
 609     setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
 610   }
 611
 612   setOperationAction(ISD::ATOMIC_LOAD,  MVT::i32, Expand);
 613   setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
 614   setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
 615   setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
 616
 617   setBooleanContents(ZeroOrOneBooleanContent);
 618   // Altivec instructions set fields to all zeros or all ones.
 619   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 620
 621   if (!isPPC64) {
 622     // These libcalls are not available in 32-bit.
 623     setLibcallName(RTLIB::SHL_I128, nullptr);
 624     setLibcallName(RTLIB::SRL_I128, nullptr);
 625     setLibcallName(RTLIB::SRA_I128, nullptr);
 626   }
 627
 628   if (isPPC64) {
 629     setStackPointerRegisterToSaveRestore(PPC::X1);
 630     setExceptionPointerRegister(PPC::X3);
 631     setExceptionSelectorRegister(PPC::X4);
 632   } else {
 633     setStackPointerRegisterToSaveRestore(PPC::R1);
 634     setExceptionPointerRegister(PPC::R3);
 635     setExceptionSelectorRegister(PPC::R4);
 636   }
 637
 638   // We have target-specific dag combine patterns for the following nodes:
 639   setTargetDAGCombine(ISD::SINT_TO_FP);
 640   setTargetDAGCombine(ISD::LOAD);
 641   setTargetDAGCombine(ISD::STORE);
 642   setTargetDAGCombine(ISD::BR_CC);
 643   if (Subtarget.useCRBits())
 644     setTargetDAGCombine(ISD::BRCOND);
 645   setTargetDAGCombine(ISD::BSWAP);
 646   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
 647
 648   setTargetDAGCombine(ISD::SIGN_EXTEND);
 649   setTargetDAGCombine(ISD::ZERO_EXTEND);
 650   setTargetDAGCombine(ISD::ANY_EXTEND);
 651
 652   if (Subtarget.useCRBits()) {
 653     setTargetDAGCombine(ISD::TRUNCATE);
 654     setTargetDAGCombine(ISD::SETCC);
 655     setTargetDAGCombine(ISD::SELECT_CC);
 656   }
 657
 658   // Use reciprocal estimates.
 659   if (TM.Options.UnsafeFPMath) {
 660     setTargetDAGCombine(ISD::FDIV);
 661     setTargetDAGCombine(ISD::FSQRT);
 662   }
 663
 664   // Darwin long double math library functions have $LDBL128 appended.
 665   if (Subtarget.isDarwin()) {
 666     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
 667     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
 668     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
 669     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
 670     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
 671     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
 672     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
 673     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
 674     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
 675     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
 676   }
 677
 678   // With 32 condition bits, we don't need to sink (and duplicate) compares
 679   // aggressively in CodeGenPrep.
 680   if (Subtarget.useCRBits())
 681     setHasMultipleConditionRegisters();
 682
 683   setMinFunctionAlignment(2);
 684   if (Subtarget.isDarwin())
 685     setPrefFunctionAlignment(4);
 686
 687   if (isPPC64 && Subtarget.isJITCodeModel())
 688     // Temporary workaround for the inability of PPC64 JIT to handle jump
 689     // tables.
 690     setSupportJumpTables(false);
 691
 692   setInsertFencesForAtomic(true);
 693
 694   if (Subtarget.enableMachineScheduler())
 695     setSchedulingPreference(Sched::Source);
 696   else
 697     setSchedulingPreference(Sched::Hybrid);
 698
 699   computeRegisterProperties();
 700
 701   // The Freescale cores does better with aggressive inlining of memcpy and
 702   // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
 703   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
 704       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
 705     MaxStoresPerMemset = 32;
 706     MaxStoresPerMemsetOptSize = 16;
 707     MaxStoresPerMemcpy = 32;
 708     MaxStoresPerMemcpyOptSize = 8;
 709     MaxStoresPerMemmove = 32;
 710     MaxStoresPerMemmoveOptSize = 8;
 711
 712     setPrefFunctionAlignment(4);
 713   }
 714 }
 715
 716 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
 717 /// the desired ByVal argument alignment.
 718 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
 719                              unsigned MaxMaxAlign) {
 720   if (MaxAlign == MaxMaxAlign)
 721     return;
 722   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
 723     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
 724       MaxAlign = 32;
 725     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
 726       MaxAlign = 16;
 727   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
 728     unsigned EltAlign = 0;
 729     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
 730     if (EltAlign > MaxAlign)
 731       MaxAlign = EltAlign;
 732   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
 733     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
 734       unsigned EltAlign = 0;
 735       getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
 736       if (EltAlign > MaxAlign)
 737         MaxAlign = EltAlign;
 738       if (MaxAlign == MaxMaxAlign)
 739         break;
 740     }
 741   }
 742 }
 743
 744 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
 745 /// function arguments in the caller parameter area.
 746 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
 747   // Darwin passes everything on 4 byte boundary.
 748   if (Subtarget.isDarwin())
 749     return 4;
 750
 751   // 16byte and wider vectors are passed on 16byte boundary.
 752   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
 753   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
 754   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
 755     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
 756   return Align;
 757 }
 758
 759 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
 760   switch (Opcode) {
 761   default: return nullptr;
 762   case PPCISD::FSEL:            return "PPCISD::FSEL";
 763   case PPCISD::FCFID:           return "PPCISD::FCFID";
 764   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
 765   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
 766   case PPCISD::FRE:             return "PPCISD::FRE";
 767   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
 768   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
 769   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
 770   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
 771   case PPCISD::VPERM:           return "PPCISD::VPERM";
 772   case PPCISD::Hi:              return "PPCISD::Hi";
 773   case PPCISD::Lo:              return "PPCISD::Lo";
 774   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
 775   case PPCISD::LOAD:            return "PPCISD::LOAD";
 776   case PPCISD::LOAD_TOC:        return "PPCISD::LOAD_TOC";
 777   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
 778   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
 779   case PPCISD::SRL:             return "PPCISD::SRL";
 780   case PPCISD::SRA:             return "PPCISD::SRA";
 781   case PPCISD::SHL:             return "PPCISD::SHL";
 782   case PPCISD::CALL:            return "PPCISD::CALL";
 783   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
 784   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
 785   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
 786   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
 787   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
 788   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
 789   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
 790   case PPCISD::VCMP:            return "PPCISD::VCMP";
 791   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
 792   case PPCISD::LBRX:            return "PPCISD::LBRX";
 793   case PPCISD::STBRX:           return "PPCISD::STBRX";
 794   case PPCISD::LARX:            return "PPCISD::LARX";
 795   case PPCISD::STCX:            return "PPCISD::STCX";
 796   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
 797   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
 798   case PPCISD::BDZ:             return "PPCISD::BDZ";
 799   case PPCISD::MFFS:            return "PPCISD::MFFS";
 800   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
 801   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
 802   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
 803   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
 804   case PPCISD::ADDIS_TOC_HA:    return "PPCISD::ADDIS_TOC_HA";
 805   case PPCISD::LD_TOC_L:        return "PPCISD::LD_TOC_L";
 806   case PPCISD::ADDI_TOC_L:      return "PPCISD::ADDI_TOC_L";
 807   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
 808   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
 809   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
 810   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
 811   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
 812   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
 813   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
 814   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
 815   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
 816   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
 817   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
 818   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
 819   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
 820   case PPCISD::SC:              return "PPCISD::SC";
 821   }
 822 }
 823
 824 EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
 825   if (!VT.isVector())
 826     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
 827   return VT.changeVectorElementTypeToInteger();
 828 }
 829
 830 //===----------------------------------------------------------------------===//
 831 // Node matching predicates, for use by the tblgen matching code.
 832 //===----------------------------------------------------------------------===//
 833
 834 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
 835 static bool isFloatingPointZero(SDValue Op) {
 836   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
 837     return CFP->getValueAPF().isZero();
 838   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
 839     // Maybe this has already been legalized into the constant pool?
 840     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
 841       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
 842         return CFP->getValueAPF().isZero();
 843   }
 844   return false;
 845 }
 846
 847 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
 848 /// true if Op is undef or if it matches the specified value.
 849 static bool isConstantOrUndef(int Op, int Val) {
 850   return Op < 0 || Op == Val;
 851 }
 852
 853 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
 854 /// VPKUHUM instruction.
 855 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
 856                                SelectionDAG &DAG) {
 857   unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1;
 858   if (!isUnary) {
 859     for (unsigned i = 0; i != 16; ++i)
 860       if (!isConstantOrUndef(N->getMaskElt(i),  i*2+j))
 861         return false;
 862   } else {
 863     for (unsigned i = 0; i != 8; ++i)
 864       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
 865           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
 866         return false;
 867   }
 868   return true;
 869 }
 870
 871 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
 872 /// VPKUWUM instruction.
 873 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary,
 874                                SelectionDAG &DAG) {
 875   unsigned j, k;
 876   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
 877     j = 0;
 878     k = 1;
 879   } else {
 880     j = 2;
 881     k = 3;
 882   }
 883   if (!isUnary) {
 884     for (unsigned i = 0; i != 16; i += 2)
 885       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j) ||
 886           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+k))
 887         return false;
 888   } else {
 889     for (unsigned i = 0; i != 8; i += 2)
 890       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j) ||
 891           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+k) ||
 892           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j) ||
 893           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+k))
 894         return false;
 895   }
 896   return true;
 897 }
 898
 899 /// isVMerge - Common function, used to match vmrg* shuffles.
 900 ///
 901 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
 902                      unsigned LHSStart, unsigned RHSStart) {
 903   if (N->getValueType(0) != MVT::v16i8)
 904     return false;
 905   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
 906          "Unsupported merge size!");
 907
 908   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
 909     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
 910       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
 911                              LHSStart+j+i*UnitSize) ||
 912           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
 913                              RHSStart+j+i*UnitSize))
 914         return false;
 915     }
 916   return true;
 917 }
 918
 919 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
 920 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
 921 /// The ShuffleKind distinguishes between big-endian merges with two
 922 /// different inputs (0), either-endian merges with two identical inputs (1),
 923 /// and little-endian merges with two different inputs (2).  For the latter,
 924 /// the input operands are swapped (see PPCInstrAltivec.td).
 925 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
 926                              unsigned ShuffleKind, SelectionDAG &DAG) {
 927   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
 928     if (ShuffleKind == 1) // unary
 929       return isVMerge(N, UnitSize, 0, 0);
 930     else if (ShuffleKind == 2) // swapped
 931       return isVMerge(N, UnitSize, 0, 16);
 932     else
 933       return false;
 934   } else {
 935     if (ShuffleKind == 1) // unary
 936       return isVMerge(N, UnitSize, 8, 8);
 937     else if (ShuffleKind == 0) // normal
 938       return isVMerge(N, UnitSize, 8, 24);
 939     else
 940       return false;
 941   }
 942 }
 943
 944 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
 945 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
 946 /// The ShuffleKind distinguishes between big-endian merges with two
 947 /// different inputs (0), either-endian merges with two identical inputs (1),
 948 /// and little-endian merges with two different inputs (2).  For the latter,
 949 /// the input operands are swapped (see PPCInstrAltivec.td).
 950 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
 951                              unsigned ShuffleKind, SelectionDAG &DAG) {
 952   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
 953     if (ShuffleKind == 1) // unary
 954       return isVMerge(N, UnitSize, 8, 8);
 955     else if (ShuffleKind == 2) // swapped
 956       return isVMerge(N, UnitSize, 8, 24);
 957     else
 958       return false;
 959   } else {
 960     if (ShuffleKind == 1) // unary
 961       return isVMerge(N, UnitSize, 0, 0);
 962     else if (ShuffleKind == 0) // normal
 963       return isVMerge(N, UnitSize, 0, 16);
 964     else
 965       return false;
 966   }
 967 }
 968
 969
 970 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
 971 /// amount, otherwise return -1.
 972 int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) {
 973   if (N->getValueType(0) != MVT::v16i8)
 974     return -1;
 975
 976   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
 977
 978   // Find the first non-undef value in the shuffle mask.
 979   unsigned i;
 980   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
 981     /*search*/;
 982
 983   if (i == 16) return -1;  // all undef.
 984
 985   // Otherwise, check to see if the rest of the elements are consecutively
 986   // numbered from this value.
 987   unsigned ShiftAmt = SVOp->getMaskElt(i);
 988   if (ShiftAmt < i) return -1;
 989
 990   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
 991
 992     ShiftAmt += i;
 993
 994     if (!isUnary) {
 995       // Check the rest of the elements to see if they are consecutive.
 996       for (++i; i != 16; ++i)
 997         if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt - i))
 998           return -1;
 999     } else {
1000       // Check the rest of the elements to see if they are consecutive.
1001       for (++i; i != 16; ++i)
1002         if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt - i) & 15))
1003           return -1;
1004     }
1005
1006   } else {  // Big Endian
1007
1008     ShiftAmt -= i;
1009
1010     if (!isUnary) {
1011       // Check the rest of the elements to see if they are consecutive.
1012       for (++i; i != 16; ++i)
1013         if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1014           return -1;
1015     } else {
1016       // Check the rest of the elements to see if they are consecutive.
1017       for (++i; i != 16; ++i)
1018         if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1019           return -1;
1020     }
1021   }
1022   return ShiftAmt;
1023 }
1024
1025 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1026 /// specifies a splat of a single element that is suitable for input to
1027 /// VSPLTB/VSPLTH/VSPLTW.
1028 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1029   assert(N->getValueType(0) == MVT::v16i8 &&
1030          (EltSize == 1 || EltSize == 2 || EltSize == 4));
1031
1032   // This is a splat operation if each element of the permute is the same, and
1033   // if the value doesn't reference the second vector.
1034   unsigned ElementBase = N->getMaskElt(0);
1035
1036   // FIXME: Handle UNDEF elements too!
1037   if (ElementBase >= 16)
1038     return false;
1039
1040   // Check that the indices are consecutive, in the case of a multi-byte element
1041   // splatted with a v16i8 mask.
1042   for (unsigned i = 1; i != EltSize; ++i)
1043     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1044       return false;
1045
1046   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1047     if (N->getMaskElt(i) < 0) continue;
1048     for (unsigned j = 0; j != EltSize; ++j)
1049       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1050         return false;
1051   }
1052   return true;
1053 }
1054
1055 /// isAllNegativeZeroVector - Returns true if all elements of build_vector
1056 /// are -0.0.
1057 bool PPC::isAllNegativeZeroVector(SDNode *N) {
1058   BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
1059
1060   APInt APVal, APUndef;
1061   unsigned BitSize;
1062   bool HasAnyUndefs;
1063
1064   if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
1065     if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
1066       return CFP->getValueAPF().isNegZero();
1067
1068   return false;
1069 }
1070
1071 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1072 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1073 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1074                                 SelectionDAG &DAG) {
1075   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1076   assert(isSplatShuffleMask(SVOp, EltSize));
1077   if (DAG.getTarget().getDataLayout()->isLittleEndian())
1078     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1079   else
1080     return SVOp->getMaskElt(0) / EltSize;
1081 }
1082
1083 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1084 /// by using a vspltis[bhw] instruction of the specified element size, return
1085 /// the constant being splatted.  The ByteSize field indicates the number of
1086 /// bytes of each element [124] -> [bhw].
1087 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1088   SDValue OpVal(nullptr, 0);
1089
1090   // If ByteSize of the splat is bigger than the element size of the
1091   // build_vector, then we have a case where we are checking for a splat where
1092   // multiple elements of the buildvector are folded together into a single
1093   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1094   unsigned EltSize = 16/N->getNumOperands();
1095   if (EltSize < ByteSize) {
1096     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
1097     SDValue UniquedVals[4];
1098     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1099
1100     // See if all of the elements in the buildvector agree across.
1101     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1102       if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1103       // If the element isn't a constant, bail fully out.
1104       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1105
1106
1107       if (!UniquedVals[i&(Multiple-1)].getNode())
1108         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1109       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1110         return SDValue();  // no match.
1111     }
1112
1113     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1114     // either constant or undef values that are identical for each chunk.  See
1115     // if these chunks can form into a larger vspltis*.
1116
1117     // Check to see if all of the leading entries are either 0 or -1.  If
1118     // neither, then this won't fit into the immediate field.
1119     bool LeadingZero = true;
1120     bool LeadingOnes = true;
1121     for (unsigned i = 0; i != Multiple-1; ++i) {
1122       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
1123
1124       LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
1125       LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
1126     }
1127     // Finally, check the least significant entry.
1128     if (LeadingZero) {
1129       if (!UniquedVals[Multiple-1].getNode())
1130         return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
1131       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1132       if (Val < 16)
1133         return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
1134     }
1135     if (LeadingOnes) {
1136       if (!UniquedVals[Multiple-1].getNode())
1137         return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
1138       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1139       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
1140         return DAG.getTargetConstant(Val, MVT::i32);
1141     }
1142
1143     return SDValue();
1144   }
1145
1146   // Check to see if this buildvec has a single non-undef value in its elements.
1147   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1148     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1149     if (!OpVal.getNode())
1150       OpVal = N->getOperand(i);
1151     else if (OpVal != N->getOperand(i))
1152       return SDValue();
1153   }
1154
1155   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
1156
1157   unsigned ValSizeInBytes = EltSize;
1158   uint64_t Value = 0;
1159   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1160     Value = CN->getZExtValue();
1161   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1162     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1163     Value = FloatToBits(CN->getValueAPF().convertToFloat());
1164   }
1165
1166   // If the splat value is larger than the element value, then we can never do
1167   // this splat.  The only case that we could fit the replicated bits into our
1168   // immediate field for would be zero, and we prefer to use vxor for it.
1169   if (ValSizeInBytes < ByteSize) return SDValue();
1170
1171   // If the element value is larger than the splat value, cut it in half and
1172   // check to see if the two halves are equal.  Continue doing this until we
1173   // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
1174   while (ValSizeInBytes > ByteSize) {
1175     ValSizeInBytes >>= 1;
1176
1177     // If the top half equals the bottom half, we're still ok.
1178     if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
1179          (Value                        & ((1 << (8*ValSizeInBytes))-1)))
1180       return SDValue();
1181   }
1182
1183   // Properly sign extend the value.
1184   int MaskVal = SignExtend32(Value, ByteSize * 8);
1185
1186   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1187   if (MaskVal == 0) return SDValue();
1188
1189   // Finally, if this value fits in a 5 bit sext field, return it
1190   if (SignExtend32<5>(MaskVal) == MaskVal)
1191     return DAG.getTargetConstant(MaskVal, MVT::i32);
1192   return SDValue();
1193 }
1194
1195 //===----------------------------------------------------------------------===//
1196 //  Addressing Mode Selection
1197 //===----------------------------------------------------------------------===//
1198
1199 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
1200 /// or 64-bit immediate, and if the value can be accurately represented as a
1201 /// sign extension from a 16-bit value.  If so, this returns true and the
1202 /// immediate.
1203 static bool isIntS16Immediate(SDNode *N, short &Imm) {
1204   if (!isa<ConstantSDNode>(N))
1205     return false;
1206
1207   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
1208   if (N->getValueType(0) == MVT::i32)
1209     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
1210   else
1211     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
1212 }
1213 static bool isIntS16Immediate(SDValue Op, short &Imm) {
1214   return isIntS16Immediate(Op.getNode(), Imm);
1215 }
1216
1217
1218 /// SelectAddressRegReg - Given the specified addressed, check to see if it
1219 /// can be represented as an indexed [r+r] operation.  Returns false if it
1220 /// can be more efficiently represented with [r+imm].
1221 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
1222                                             SDValue &Index,
1223                                             SelectionDAG &DAG) const {
1224   short imm = 0;
1225   if (N.getOpcode() == ISD::ADD) {
1226     if (isIntS16Immediate(N.getOperand(1), imm))
1227       return false;    // r+i
1228     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
1229       return false;    // r+i
1230
1231     Base = N.getOperand(0);
1232     Index = N.getOperand(1);
1233     return true;
1234   } else if (N.getOpcode() == ISD::OR) {
1235     if (isIntS16Immediate(N.getOperand(1), imm))
1236       return false;    // r+i can fold it if we can.
1237
1238     // If this is an or of disjoint bitfields, we can codegen this as an add
1239     // (for better address arithmetic) if the LHS and RHS of the OR are provably
1240     // disjoint.
1241     APInt LHSKnownZero, LHSKnownOne;
1242     APInt RHSKnownZero, RHSKnownOne;
1243     DAG.computeKnownBits(N.getOperand(0),
1244                          LHSKnownZero, LHSKnownOne);
1245
1246     if (LHSKnownZero.getBoolValue()) {
1247       DAG.computeKnownBits(N.getOperand(1),
1248                            RHSKnownZero, RHSKnownOne);
1249       // If all of the bits are known zero on the LHS or RHS, the add won't
1250       // carry.
1251       if (~(LHSKnownZero | RHSKnownZero) == 0) {
1252         Base = N.getOperand(0);
1253         Index = N.getOperand(1);
1254         return true;
1255       }
1256     }
1257   }
1258
1259   return false;
1260 }
1261
1262 // If we happen to be doing an i64 load or store into a stack slot that has
1263 // less than a 4-byte alignment, then the frame-index elimination may need to
1264 // use an indexed load or store instruction (because the offset may not be a
1265 // multiple of 4). The extra register needed to hold the offset comes from the
1266 // register scavenger, and it is possible that the scavenger will need to use
1267 // an emergency spill slot. As a result, we need to make sure that a spill slot
1268 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
1269 // stack slot.
1270 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
1271   // FIXME: This does not handle the LWA case.
1272   if (VT != MVT::i64)
1273     return;
1274
1275   // NOTE: We'll exclude negative FIs here, which come from argument
1276   // lowering, because there are no known test cases triggering this problem
1277   // using packed structures (or similar). We can remove this exclusion if
1278   // we find such a test case. The reason why this is so test-case driven is
1279   // because this entire 'fixup' is only to prevent crashes (from the
1280   // register scavenger) on not-really-valid inputs. For example, if we have:
1281   //   %a = alloca i1
1282   //   %b = bitcast i1* %a to i64*
1283   //   store i64* a, i64 b
1284   // then the store should really be marked as 'align 1', but is not. If it
1285   // were marked as 'align 1' then the indexed form would have been
1286   // instruction-selected initially, and the problem this 'fixup' is preventing
1287   // won't happen regardless.
1288   if (FrameIdx < 0)
1289     return;
1290
1291   MachineFunction &MF = DAG.getMachineFunction();
1292   MachineFrameInfo *MFI = MF.getFrameInfo();
1293
1294   unsigned Align = MFI->getObjectAlignment(FrameIdx);
1295   if (Align >= 4)
1296     return;
1297
1298   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1299   FuncInfo->setHasNonRISpills();
1300 }
1301
1302 /// Returns true if the address N can be represented by a base register plus
1303 /// a signed 16-bit displacement [r+imm], and if it is not better
1304 /// represented as reg+reg.  If Aligned is true, only accept displacements
1305 /// suitable for STD and friends, i.e. multiples of 4.
1306 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
1307                                             SDValue &Base,
1308                                             SelectionDAG &DAG,
1309                                             bool Aligned) const {
1310   // FIXME dl should come from parent load or store, not from address
1311   SDLoc dl(N);
1312   // If this can be more profitably realized as r+r, fail.
1313   if (SelectAddressRegReg(N, Disp, Base, DAG))
1314     return false;
1315
1316   if (N.getOpcode() == ISD::ADD) {
1317     short imm = 0;
1318     if (isIntS16Immediate(N.getOperand(1), imm) &&
1319         (!Aligned || (imm & 3) == 0)) {
1320       Disp = DAG.getTargetConstant(imm, N.getValueType());
1321       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1322         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1323         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1324       } else {
1325         Base = N.getOperand(0);
1326       }
1327       return true; // [r+i]
1328     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1329       // Match LOAD (ADD (X, Lo(G))).
1330       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1331              && "Cannot handle constant offsets yet!");
1332       Disp = N.getOperand(1).getOperand(0);  // The global address.
1333       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1334              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
1335              Disp.getOpcode() == ISD::TargetConstantPool ||
1336              Disp.getOpcode() == ISD::TargetJumpTable);
1337       Base = N.getOperand(0);
1338       return true;  // [&g+r]
1339     }
1340   } else if (N.getOpcode() == ISD::OR) {
1341     short imm = 0;
1342     if (isIntS16Immediate(N.getOperand(1), imm) &&
1343         (!Aligned || (imm & 3) == 0)) {
1344       // If this is an or of disjoint bitfields, we can codegen this as an add
1345       // (for better address arithmetic) if the LHS and RHS of the OR are
1346       // provably disjoint.
1347       APInt LHSKnownZero, LHSKnownOne;
1348       DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1349
1350       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1351         // If all of the bits are known zero on the LHS or RHS, the add won't
1352         // carry.
1353         if (FrameIndexSDNode *FI =
1354               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1355           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1356           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1357         } else {
1358           Base = N.getOperand(0);
1359         }
1360         Disp = DAG.getTargetConstant(imm, N.getValueType());
1361         return true;
1362       }
1363     }
1364   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1365     // Loading from a constant address.
1366
1367     // If this address fits entirely in a 16-bit sext immediate field, codegen
1368     // this as "d, 0"
1369     short Imm;
1370     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
1371       Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
1372       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1373                              CN->getValueType(0));
1374       return true;
1375     }
1376
1377     // Handle 32-bit sext immediates with LIS + addr mode.
1378     if ((CN->getValueType(0) == MVT::i32 ||
1379          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
1380         (!Aligned || (CN->getZExtValue() & 3) == 0)) {
1381       int Addr = (int)CN->getZExtValue();
1382
1383       // Otherwise, break this down into an LIS + disp.
1384       Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
1385
1386       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
1387       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1388       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
1389       return true;
1390     }
1391   }
1392
1393   Disp = DAG.getTargetConstant(0, getPointerTy());
1394   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
1395     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1396     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1397   } else
1398     Base = N;
1399   return true;      // [r+0]
1400 }
1401
1402 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
1403 /// represented as an indexed [r+r] operation.
1404 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
1405                                                 SDValue &Index,
1406                                                 SelectionDAG &DAG) const {
1407   // Check to see if we can easily represent this as an [r+r] address.  This
1408   // will fail if it thinks that the address is more profitably represented as
1409   // reg+imm, e.g. where imm = 0.
1410   if (SelectAddressRegReg(N, Base, Index, DAG))
1411     return true;
1412
1413   // If the operand is an addition, always emit this as [r+r], since this is
1414   // better (for code size, and execution, as the memop does the add for free)
1415   // than emitting an explicit add.
1416   if (N.getOpcode() == ISD::ADD) {
1417     Base = N.getOperand(0);
1418     Index = N.getOperand(1);
1419     return true;
1420   }
1421
1422   // Otherwise, do it the hard way, using R0 as the base register.
1423   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1424                          N.getValueType());
1425   Index = N;
1426   return true;
1427 }
1428
1429 /// getPreIndexedAddressParts - returns true by value, base pointer and
1430 /// offset pointer and addressing mode by reference if the node's address
1431 /// can be legally represented as pre-indexed load / store address.
1432 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1433                                                   SDValue &Offset,
1434                                                   ISD::MemIndexedMode &AM,
1435                                                   SelectionDAG &DAG) const {
1436   if (DisablePPCPreinc) return false;
1437
1438   bool isLoad = true;
1439   SDValue Ptr;
1440   EVT VT;
1441   unsigned Alignment;
1442   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1443     Ptr = LD->getBasePtr();
1444     VT = LD->getMemoryVT();
1445     Alignment = LD->getAlignment();
1446   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1447     Ptr = ST->getBasePtr();
1448     VT  = ST->getMemoryVT();
1449     Alignment = ST->getAlignment();
1450     isLoad = false;
1451   } else
1452     return false;
1453
1454   // PowerPC doesn't have preinc load/store instructions for vectors.
1455   if (VT.isVector())
1456     return false;
1457
1458   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
1459
1460     // Common code will reject creating a pre-inc form if the base pointer
1461     // is a frame index, or if N is a store and the base pointer is either
1462     // the same as or a predecessor of the value being stored.  Check for
1463     // those situations here, and try with swapped Base/Offset instead.
1464     bool Swap = false;
1465
1466     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
1467       Swap = true;
1468     else if (!isLoad) {
1469       SDValue Val = cast<StoreSDNode>(N)->getValue();
1470       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
1471         Swap = true;
1472     }
1473
1474     if (Swap)
1475       std::swap(Base, Offset);
1476
1477     AM = ISD::PRE_INC;
1478     return true;
1479   }
1480
1481   // LDU/STU can only handle immediates that are a multiple of 4.
1482   if (VT != MVT::i64) {
1483     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
1484       return false;
1485   } else {
1486     // LDU/STU need an address with at least 4-byte alignment.
1487     if (Alignment < 4)
1488       return false;
1489
1490     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
1491       return false;
1492   }
1493
1494   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1495     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
1496     // sext i32 to i64 when addr mode is r+i.
1497     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
1498         LD->getExtensionType() == ISD::SEXTLOAD &&
1499         isa<ConstantSDNode>(Offset))
1500       return false;
1501   }
1502
1503   AM = ISD::PRE_INC;
1504   return true;
1505 }
1506
1507 //===----------------------------------------------------------------------===//
1508 //  LowerOperation implementation
1509 //===----------------------------------------------------------------------===//
1510
1511 /// GetLabelAccessInfo - Return true if we should reference labels using a
1512 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
1513 static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
1514                                unsigned &LoOpFlags,
1515                                const GlobalValue *GV = nullptr) {
1516   HiOpFlags = PPCII::MO_HA;
1517   LoOpFlags = PPCII::MO_LO;
1518
1519   // Don't use the pic base if not in PIC relocation model.
1520   bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
1521
1522   if (isPIC) {
1523     HiOpFlags |= PPCII::MO_PIC_FLAG;
1524     LoOpFlags |= PPCII::MO_PIC_FLAG;
1525   }
1526
1527   // If this is a reference to a global value that requires a non-lazy-ptr, make
1528   // sure that instruction lowering adds it.
1529   if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
1530     HiOpFlags |= PPCII::MO_NLP_FLAG;
1531     LoOpFlags |= PPCII::MO_NLP_FLAG;
1532
1533     if (GV->hasHiddenVisibility()) {
1534       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1535       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1536     }
1537   }
1538
1539   return isPIC;
1540 }
1541
1542 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
1543                              SelectionDAG &DAG) {
1544   EVT PtrVT = HiPart.getValueType();
1545   SDValue Zero = DAG.getConstant(0, PtrVT);
1546   SDLoc DL(HiPart);
1547
1548   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
1549   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
1550
1551   // With PIC, the first instruction is actually "GR+hi(&G)".
1552   if (isPIC)
1553     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
1554                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
1555
1556   // Generate non-pic code that has direct accesses to the constant pool.
1557   // The address of the global is just (hi(&g)+lo(&g)).
1558   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
1559 }
1560
1561 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
1562                                              SelectionDAG &DAG) const {
1563   EVT PtrVT = Op.getValueType();
1564   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1565   const Constant *C = CP->getConstVal();
1566
1567   // 64-bit SVR4 ABI code is always position-independent.
1568   // The actual address of the GlobalValue is stored in the TOC.
1569   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
1570     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
1571     return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
1572                        DAG.getRegister(PPC::X2, MVT::i64));
1573   }
1574
1575   unsigned MOHiFlag, MOLoFlag;
1576   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1577
1578   if (isPIC && Subtarget.isSVR4ABI()) {
1579     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
1580                                            PPCII::MO_PIC_FLAG);
1581     SDLoc DL(CP);
1582     return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
1583                        DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
1584   }
1585
1586   SDValue CPIHi =
1587     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
1588   SDValue CPILo =
1589     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
1590   return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
1591 }
1592
1593 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1594   EVT PtrVT = Op.getValueType();
1595   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1596
1597   // 64-bit SVR4 ABI code is always position-independent.
1598   // The actual address of the GlobalValue is stored in the TOC.
1599   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
1600     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1601     return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
1602                        DAG.getRegister(PPC::X2, MVT::i64));
1603   }
1604
1605   unsigned MOHiFlag, MOLoFlag;
1606   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1607
1608   if (isPIC && Subtarget.isSVR4ABI()) {
1609     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
1610                                         PPCII::MO_PIC_FLAG);
1611     SDLoc DL(GA);
1612     return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA,
1613                        DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
1614   }
1615
1616   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
1617   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
1618   return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
1619 }
1620
1621 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
1622                                              SelectionDAG &DAG) const {
1623   EVT PtrVT = Op.getValueType();
1624
1625   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1626
1627   unsigned MOHiFlag, MOLoFlag;
1628   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
1629   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
1630   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
1631   return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
1632 }
1633
1634 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1635                                               SelectionDAG &DAG) const {
1636
1637   // FIXME: TLS addresses currently use medium model code sequences,
1638   // which is the most useful form.  Eventually support for small and
1639   // large models could be added if users need it, at the cost of
1640   // additional complexity.
1641   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1642   SDLoc dl(GA);
1643   const GlobalValue *GV = GA->getGlobal();
1644   EVT PtrVT = getPointerTy();
1645   bool is64bit = Subtarget.isPPC64();
1646
1647   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
1648
1649   if (Model == TLSModel::LocalExec) {
1650     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1651                                                PPCII::MO_TPREL_HA);
1652     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1653                                                PPCII::MO_TPREL_LO);
1654     SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
1655                                      is64bit ? MVT::i64 : MVT::i32);
1656     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
1657     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
1658   }
1659
1660   if (Model == TLSModel::InitialExec) {
1661     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1662     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
1663                                                 PPCII::MO_TLS);
1664     SDValue GOTPtr;
1665     if (is64bit) {
1666       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1667       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
1668                            PtrVT, GOTReg, TGA);
1669     } else
1670       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
1671     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
1672                                    PtrVT, TGA, GOTPtr);
1673     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
1674   }
1675
1676   if (Model == TLSModel::GeneralDynamic) {
1677     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1678     SDValue GOTPtr;
1679     if (is64bit) {
1680       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1681       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
1682                                    GOTReg, TGA);
1683     } else {
1684       GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
1685     }
1686     SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
1687                                    GOTPtr, TGA);
1688
1689     // We need a chain node, and don't have one handy.  The underlying
1690     // call has no side effects, so using the function entry node
1691     // suffices.
1692     SDValue Chain = DAG.getEntryNode();
1693     Chain = DAG.getCopyToReg(Chain, dl,
1694                              is64bit ? PPC::X3 : PPC::R3, GOTEntry);
1695     SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3,
1696                                       is64bit ? MVT::i64 : MVT::i32);
1697     SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
1698                                   PtrVT, ParmReg, TGA);
1699     // The return value from GET_TLS_ADDR really is in X3 already, but
1700     // some hacks are needed here to tie everything together.  The extra
1701     // copies dissolve during subsequent transforms.
1702     Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr);
1703     return DAG.getCopyFromReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, PtrVT);
1704   }
1705
1706   if (Model == TLSModel::LocalDynamic) {
1707     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
1708     SDValue GOTPtr;
1709     if (is64bit) {
1710       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
1711       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
1712                            GOTReg, TGA);
1713     } else {
1714       GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
1715     }
1716     SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
1717                                    GOTPtr, TGA);
1718
1719     // We need a chain node, and don't have one handy.  The underlying
1720     // call has no side effects, so using the function entry node
1721     // suffices.
1722     SDValue Chain = DAG.getEntryNode();
1723     Chain = DAG.getCopyToReg(Chain, dl,
1724                              is64bit ? PPC::X3 : PPC::R3, GOTEntry);
1725     SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3,
1726                                       is64bit ? MVT::i64 : MVT::i32);
1727     SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
1728                                   PtrVT, ParmReg, TGA);
1729     // The return value from GET_TLSLD_ADDR really is in X3 already, but
1730     // some hacks are needed here to tie everything together.  The extra
1731     // copies dissolve during subsequent transforms.
1732     Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr);
1733     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
1734                                       Chain, ParmReg, TGA);
1735     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
1736   }
1737
1738   llvm_unreachable("Unknown TLS model!");
1739 }
1740
1741 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
1742                                               SelectionDAG &DAG) const {
1743   EVT PtrVT = Op.getValueType();
1744   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1745   SDLoc DL(GSDN);
1746   const GlobalValue *GV = GSDN->getGlobal();
1747
1748   // 64-bit SVR4 ABI code is always position-independent.
1749   // The actual address of the GlobalValue is stored in the TOC.
1750   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
1751     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
1752     return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
1753                        DAG.getRegister(PPC::X2, MVT::i64));
1754   }
1755
1756   unsigned MOHiFlag, MOLoFlag;
1757   bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
1758
1759   if (isPIC && Subtarget.isSVR4ABI()) {
1760     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
1761                                             GSDN->getOffset(),
1762                                             PPCII::MO_PIC_FLAG);
1763     return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
1764                        DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32));
1765   }
1766
1767   SDValue GAHi =
1768     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
1769   SDValue GALo =
1770     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
1771
1772   SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
1773
1774   // If the global reference is actually to a non-lazy-pointer, we have to do an
1775   // extra load to get the address of the global.
1776   if (MOHiFlag & PPCII::MO_NLP_FLAG)
1777     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
1778                       false, false, false, 0);
1779   return Ptr;
1780 }
1781
1782 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1783   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1784   SDLoc dl(Op);
1785
1786   if (Op.getValueType() == MVT::v2i64) {
1787     // When the operands themselves are v2i64 values, we need to do something
1788     // special because VSX has no underlying comparison operations for these.
1789     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
1790       // Equality can be handled by casting to the legal type for Altivec
1791       // comparisons, everything else needs to be expanded.
1792       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
1793         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
1794                  DAG.getSetCC(dl, MVT::v4i32,
1795                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
1796                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
1797                    CC));
1798       }
1799
1800       return SDValue();
1801     }
1802
1803     // We handle most of these in the usual way.
1804     return Op;
1805   }
1806
1807   // If we're comparing for equality to zero, expose the fact that this is
1808   // implented as a ctlz/srl pair on ppc, so that the dag combiner can
1809   // fold the new nodes.
1810   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1811     if (C->isNullValue() && CC == ISD::SETEQ) {
1812       EVT VT = Op.getOperand(0).getValueType();
1813       SDValue Zext = Op.getOperand(0);
1814       if (VT.bitsLT(MVT::i32)) {
1815         VT = MVT::i32;
1816         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
1817       }
1818       unsigned Log2b = Log2_32(VT.getSizeInBits());
1819       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
1820       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
1821                                 DAG.getConstant(Log2b, MVT::i32));
1822       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
1823     }
1824     // Leave comparisons against 0 and -1 alone for now, since they're usually
1825     // optimized.  FIXME: revisit this when we can custom lower all setcc
1826     // optimizations.
1827     if (C->isAllOnesValue() || C->isNullValue())
1828       return SDValue();
1829   }
1830
1831   // If we have an integer seteq/setne, turn it into a compare against zero
1832   // by xor'ing the rhs with the lhs, which is faster than setting a
1833   // condition register, reading it back out, and masking the correct bit.  The
1834   // normal approach here uses sub to do this instead of xor.  Using xor exposes
1835   // the result to other bit-twiddling opportunities.
1836   EVT LHSVT = Op.getOperand(0).getValueType();
1837   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1838     EVT VT = Op.getValueType();
1839     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
1840                                 Op.getOperand(1));
1841     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
1842   }
1843   return SDValue();
1844 }
1845
1846 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
1847                                       const PPCSubtarget &Subtarget) const {
1848   SDNode *Node = Op.getNode();
1849   EVT VT = Node->getValueType(0);
1850   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1851   SDValue InChain = Node->getOperand(0);
1852   SDValue VAListPtr = Node->getOperand(1);
1853   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1854   SDLoc dl(Node);
1855
1856   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
1857
1858   // gpr_index
1859   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
1860                                     VAListPtr, MachinePointerInfo(SV), MVT::i8,
1861                                     false, false, false, 0);
1862   InChain = GprIndex.getValue(1);
1863
1864   if (VT == MVT::i64) {
1865     // Check if GprIndex is even
1866     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
1867                                  DAG.getConstant(1, MVT::i32));
1868     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
1869                                 DAG.getConstant(0, MVT::i32), ISD::SETNE);
1870     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
1871                                           DAG.getConstant(1, MVT::i32));
1872     // Align GprIndex to be even if it isn't
1873     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
1874                            GprIndex);
1875   }
1876
1877   // fpr index is 1 byte after gpr
1878   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1879                                DAG.getConstant(1, MVT::i32));
1880
1881   // fpr
1882   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
1883                                     FprPtr, MachinePointerInfo(SV), MVT::i8,
1884                                     false, false, false, 0);
1885   InChain = FprIndex.getValue(1);
1886
1887   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1888                                        DAG.getConstant(8, MVT::i32));
1889
1890   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
1891                                         DAG.getConstant(4, MVT::i32));
1892
1893   // areas
1894   SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
1895                                      MachinePointerInfo(), false, false,
1896                                      false, 0);
1897   InChain = OverflowArea.getValue(1);
1898
1899   SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
1900                                     MachinePointerInfo(), false, false,
1901                                     false, 0);
1902   InChain = RegSaveArea.getValue(1);
1903
1904   // select overflow_area if index > 8
1905   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
1906                             DAG.getConstant(8, MVT::i32), ISD::SETLT);
1907
1908   // adjustment constant gpr_index * 4/8
1909   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
1910                                     VT.isInteger() ? GprIndex : FprIndex,
1911                                     DAG.getConstant(VT.isInteger() ? 4 : 8,
1912                                                     MVT::i32));
1913
1914   // OurReg = RegSaveArea + RegConstant
1915   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
1916                                RegConstant);
1917
1918   // Floating types are 32 bytes into RegSaveArea
1919   if (VT.isFloatingPoint())
1920     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
1921                          DAG.getConstant(32, MVT::i32));
1922
1923   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
1924   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
1925                                    VT.isInteger() ? GprIndex : FprIndex,
1926                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1,
1927                                                    MVT::i32));
1928
1929   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
1930                               VT.isInteger() ? VAListPtr : FprPtr,
1931                               MachinePointerInfo(SV),
1932                               MVT::i8, false, false, 0);
1933
1934   // determine if we should load from reg_save_area or overflow_area
1935   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
1936
1937   // increase overflow_area by 4/8 if gpr/fpr > 8
1938   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
1939                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
1940                                           MVT::i32));
1941
1942   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
1943                              OverflowAreaPlusN);
1944
1945   InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
1946                               OverflowAreaPtr,
1947                               MachinePointerInfo(),
1948                               MVT::i32, false, false, 0);
1949
1950   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
1951                      false, false, false, 0);
1952 }
1953
1954 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
1955                                        const PPCSubtarget &Subtarget) const {
1956   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
1957
1958   // We have to copy the entire va_list struct:
1959   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
1960   return DAG.getMemcpy(Op.getOperand(0), Op,
1961                        Op.getOperand(1), Op.getOperand(2),
1962                        DAG.getConstant(12, MVT::i32), 8, false, true,
1963                        MachinePointerInfo(), MachinePointerInfo());
1964 }
1965
1966 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
1967                                                   SelectionDAG &DAG) const {
1968   return Op.getOperand(0);
1969 }
1970
1971 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
1972                                                 SelectionDAG &DAG) const {
1973   SDValue Chain = Op.getOperand(0);
1974   SDValue Trmp = Op.getOperand(1); // trampoline
1975   SDValue FPtr = Op.getOperand(2); // nested function
1976   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
1977   SDLoc dl(Op);
1978
1979   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1980   bool isPPC64 = (PtrVT == MVT::i64);
1981   Type *IntPtrTy =
1982     DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
1983                                                              *DAG.getContext());
1984
1985   TargetLowering::ArgListTy Args;
1986   TargetLowering::ArgListEntry Entry;
1987
1988   Entry.Ty = IntPtrTy;
1989   Entry.Node = Trmp; Args.push_back(Entry);
1990
1991   // TrampSize == (isPPC64 ? 48 : 40);
1992   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
1993                                isPPC64 ? MVT::i64 : MVT::i32);
1994   Args.push_back(Entry);
1995
1996   Entry.Node = FPtr; Args.push_back(Entry);
1997   Entry.Node = Nest; Args.push_back(Entry);
1998
1999   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2000   TargetLowering::CallLoweringInfo CLI(DAG);
2001   CLI.setDebugLoc(dl).setChain(Chain)
2002     .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2003                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
2004                std::move(Args), 0);
2005
2006   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2007   return CallResult.second;
2008 }
2009
2010 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
2011                                         const PPCSubtarget &Subtarget) const {
2012   MachineFunction &MF = DAG.getMachineFunction();
2013   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2014
2015   SDLoc dl(Op);
2016
2017   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
2018     // vastart just stores the address of the VarArgsFrameIndex slot into the
2019     // memory location argument.
2020     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2021     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2022     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2023     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2024                         MachinePointerInfo(SV),
2025                         false, false, 0);
2026   }
2027
2028   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2029   // We suppose the given va_list is already allocated.
2030   //
2031   // typedef struct {
2032   //  char gpr;     /* index into the array of 8 GPRs
2033   //                 * stored in the register save area
2034   //                 * gpr=0 corresponds to r3,
2035   //                 * gpr=1 to r4, etc.
2036   //                 */
2037   //  char fpr;     /* index into the array of 8 FPRs
2038   //                 * stored in the register save area
2039   //                 * fpr=0 corresponds to f1,
2040   //                 * fpr=1 to f2, etc.
2041   //                 */
2042   //  char *overflow_arg_area;
2043   //                /* location on stack that holds
2044   //                 * the next overflow argument
2045   //                 */
2046   //  char *reg_save_area;
2047   //               /* where r3:r10 and f1:f8 (if saved)
2048   //                * are stored
2049   //                */
2050   // } va_list[1];
2051
2052
2053   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
2054   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
2055
2056
2057   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2058
2059   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2060                                             PtrVT);
2061   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2062                                  PtrVT);
2063
2064   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2065   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
2066
2067   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2068   SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
2069
2070   uint64_t FPROffset = 1;
2071   SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
2072
2073   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2074
2075   // Store first byte : number of int regs
2076   SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
2077                                          Op.getOperand(1),
2078                                          MachinePointerInfo(SV),
2079                                          MVT::i8, false, false, 0);
2080   uint64_t nextOffset = FPROffset;
2081   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2082                                   ConstFPROffset);
2083
2084   // Store second byte : number of float regs
2085   SDValue secondStore =
2086     DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2087                       MachinePointerInfo(SV, nextOffset), MVT::i8,
2088                       false, false, 0);
2089   nextOffset += StackOffset;
2090   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2091
2092   // Store second word : arguments given on stack
2093   SDValue thirdStore =
2094     DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2095                  MachinePointerInfo(SV, nextOffset),
2096                  false, false, 0);
2097   nextOffset += FrameOffset;
2098   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2099
2100   // Store third word : arguments given in registers
2101   return DAG.getStore(thirdStore, dl, FR, nextPtr,
2102                       MachinePointerInfo(SV, nextOffset),
2103                       false, false, 0);
2104
2105 }
2106
2107 #include "PPCGenCallingConv.inc"
2108
2109 // Function whose sole purpose is to kill compiler warnings
2110 // stemming from unused functions included from PPCGenCallingConv.inc.
2111 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2112   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2113 }
2114
2115 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2116                                       CCValAssign::LocInfo &LocInfo,
2117                                       ISD::ArgFlagsTy &ArgFlags,
2118                                       CCState &State) {
2119   return true;
2120 }
2121
2122 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2123                                              MVT &LocVT,
2124                                              CCValAssign::LocInfo &LocInfo,
2125                                              ISD::ArgFlagsTy &ArgFlags,
2126                                              CCState &State) {
2127   static const MCPhysReg ArgRegs[] = {
2128     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2129     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2130   };
2131   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2132
2133   unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
2134
2135   // Skip one register if the first unallocated register has an even register
2136   // number and there are still argument registers available which have not been
2137   // allocated yet. RegNum is actually an index into ArgRegs, which means we
2138   // need to skip a register if RegNum is odd.
2139   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
2140     State.AllocateReg(ArgRegs[RegNum]);
2141   }
2142
2143   // Always return false here, as this function only makes sure that the first
2144   // unallocated register has an odd register number and does not actually
2145   // allocate a register for the current argument.
2146   return false;
2147 }
2148
2149 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
2150                                                MVT &LocVT,
2151                                                CCValAssign::LocInfo &LocInfo,
2152                                                ISD::ArgFlagsTy &ArgFlags,
2153                                                CCState &State) {
2154   static const MCPhysReg ArgRegs[] = {
2155     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2156     PPC::F8
2157   };
2158
2159   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2160
2161   unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
2162
2163   // If there is only one Floating-point register left we need to put both f64
2164   // values of a split ppc_fp128 value on the stack.
2165   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
2166     State.AllocateReg(ArgRegs[RegNum]);
2167   }
2168
2169   // Always return false here, as this function only makes sure that the two f64
2170   // values a ppc_fp128 value is split into are both passed in registers or both
2171   // passed on the stack and does not actually allocate a register for the
2172   // current argument.
2173   return false;
2174 }
2175
2176 /// GetFPR - Get the set of FP registers that should be allocated for arguments,
2177 /// on Darwin.
2178 static const MCPhysReg *GetFPR() {
2179   static const MCPhysReg FPR[] = {
2180     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2181     PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
2182   };
2183
2184   return FPR;
2185 }
2186
2187 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
2188 /// the stack.
2189 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
2190                                        unsigned PtrByteSize) {
2191   unsigned ArgSize = ArgVT.getStoreSize();
2192   if (Flags.isByVal())
2193     ArgSize = Flags.getByValSize();
2194
2195   // Round up to multiples of the pointer size, except for array members,
2196   // which are always packed.
2197   if (!Flags.isInConsecutiveRegs())
2198     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2199
2200   return ArgSize;
2201 }
2202
2203 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
2204 /// on the stack.
2205 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
2206                                             ISD::ArgFlagsTy Flags,
2207                                             unsigned PtrByteSize) {
2208   unsigned Align = PtrByteSize;
2209
2210   // Altivec parameters are padded to a 16 byte boundary.
2211   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2212       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2213       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
2214     Align = 16;
2215
2216   // ByVal parameters are aligned as requested.
2217   if (Flags.isByVal()) {
2218     unsigned BVAlign = Flags.getByValAlign();
2219     if (BVAlign > PtrByteSize) {
2220       if (BVAlign % PtrByteSize != 0)
2221           llvm_unreachable(
2222             "ByVal alignment is not a multiple of the pointer size");
2223
2224       Align = BVAlign;
2225     }
2226   }
2227
2228   // Array members are always packed to their original alignment.
2229   if (Flags.isInConsecutiveRegs()) {
2230     // If the array member was split into multiple registers, the first
2231     // needs to be aligned to the size of the full type.  (Except for
2232     // ppcf128, which is only aligned as its f64 components.)
2233     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
2234       Align = OrigVT.getStoreSize();
2235     else
2236       Align = ArgVT.getStoreSize();
2237   }
2238
2239   return Align;
2240 }
2241
2242 /// CalculateStackSlotUsed - Return whether this argument will use its
2243 /// stack slot (instead of being passed in registers).  ArgOffset,
2244 /// AvailableFPRs, and AvailableVRs must hold the current argument
2245 /// position, and will be updated to account for this argument.
2246 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
2247                                    ISD::ArgFlagsTy Flags,
2248                                    unsigned PtrByteSize,
2249                                    unsigned LinkageSize,
2250                                    unsigned ParamAreaSize,
2251                                    unsigned &ArgOffset,
2252                                    unsigned &AvailableFPRs,
2253                                    unsigned &AvailableVRs) {
2254   bool UseMemory = false;
2255
2256   // Respect alignment of argument on the stack.
2257   unsigned Align =
2258     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
2259   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
2260   // If there's no space left in the argument save area, we must
2261   // use memory (this check also catches zero-sized arguments).
2262   if (ArgOffset >= LinkageSize + ParamAreaSize)
2263     UseMemory = true;
2264
2265   // Allocate argument on the stack.
2266   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
2267   if (Flags.isInConsecutiveRegsLast())
2268     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2269   // If we overran the argument save area, we must use memory
2270   // (this check catches arguments passed partially in memory)
2271   if (ArgOffset > LinkageSize + ParamAreaSize)
2272     UseMemory = true;
2273
2274   // However, if the argument is actually passed in an FPR or a VR,
2275   // we don't use memory after all.
2276   if (!Flags.isByVal()) {
2277     if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
2278       if (AvailableFPRs > 0) {
2279         --AvailableFPRs;
2280         return false;
2281       }
2282     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2283         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2284         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
2285       if (AvailableVRs > 0) {
2286         --AvailableVRs;
2287         return false;
2288       }
2289   }
2290
2291   return UseMemory;
2292 }
2293
2294 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
2295 /// ensure minimum alignment required for target.
2296 static unsigned EnsureStackAlignment(const TargetMachine &Target,
2297                                      unsigned NumBytes) {
2298   unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment();
2299   unsigned AlignMask = TargetAlign - 1;
2300   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2301   return NumBytes;
2302 }
2303
2304 SDValue
2305 PPCTargetLowering::LowerFormalArguments(SDValue Chain,
2306                                         CallingConv::ID CallConv, bool isVarArg,
2307                                         const SmallVectorImpl<ISD::InputArg>
2308                                           &Ins,
2309                                         SDLoc dl, SelectionDAG &DAG,
2310                                         SmallVectorImpl<SDValue> &InVals)
2311                                           const {
2312   if (Subtarget.isSVR4ABI()) {
2313     if (Subtarget.isPPC64())
2314       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
2315                                          dl, DAG, InVals);
2316     else
2317       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
2318                                          dl, DAG, InVals);
2319   } else {
2320     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
2321                                        dl, DAG, InVals);
2322   }
2323 }
2324
2325 SDValue
2326 PPCTargetLowering::LowerFormalArguments_32SVR4(
2327                                       SDValue Chain,
2328                                       CallingConv::ID CallConv, bool isVarArg,
2329                                       const SmallVectorImpl<ISD::InputArg>
2330                                         &Ins,
2331                                       SDLoc dl, SelectionDAG &DAG,
2332                                       SmallVectorImpl<SDValue> &InVals) const {
2333
2334   // 32-bit SVR4 ABI Stack Frame Layout:
2335   //              +-----------------------------------+
2336   //        +-->  |            Back chain             |
2337   //        |     +-----------------------------------+
2338   //        |     | Floating-point register save area |
2339   //        |     +-----------------------------------+
2340   //        |     |    General register save area     |
2341   //        |     +-----------------------------------+
2342   //        |     |          CR save word             |
2343   //        |     +-----------------------------------+
2344   //        |     |         VRSAVE save word          |
2345   //        |     +-----------------------------------+
2346   //        |     |         Alignment padding         |
2347   //        |     +-----------------------------------+
2348   //        |     |     Vector register save area     |
2349   //        |     +-----------------------------------+
2350   //        |     |       Local variable space        |
2351   //        |     +-----------------------------------+
2352   //        |     |        Parameter list area        |
2353   //        |     +-----------------------------------+
2354   //        |     |           LR save word            |
2355   //        |     +-----------------------------------+
2356   // SP-->  +---  |            Back chain             |
2357   //              +-----------------------------------+
2358   //
2359   // Specifications:
2360   //   System V Application Binary Interface PowerPC Processor Supplement
2361   //   AltiVec Technology Programming Interface Manual
2362
2363   MachineFunction &MF = DAG.getMachineFunction();
2364   MachineFrameInfo *MFI = MF.getFrameInfo();
2365   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2366
2367   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2368   // Potential tail calls could cause overwriting of argument stack slots.
2369   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2370                        (CallConv == CallingConv::Fast));
2371   unsigned PtrByteSize = 4;
2372
2373   // Assign locations to all of the incoming arguments.
2374   SmallVector<CCValAssign, 16> ArgLocs;
2375   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2376                  getTargetMachine(), ArgLocs, *DAG.getContext());
2377
2378   // Reserve space for the linkage area on the stack.
2379   unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);
2380   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
2381
2382   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
2383
2384   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2385     CCValAssign &VA = ArgLocs[i];
2386
2387     // Arguments stored in registers.
2388     if (VA.isRegLoc()) {
2389       const TargetRegisterClass *RC;
2390       EVT ValVT = VA.getValVT();
2391
2392       switch (ValVT.getSimpleVT().SimpleTy) {
2393         default:
2394           llvm_unreachable("ValVT not supported by formal arguments Lowering");
2395         case MVT::i1:
2396         case MVT::i32:
2397           RC = &PPC::GPRCRegClass;
2398           break;
2399         case MVT::f32:
2400           RC = &PPC::F4RCRegClass;
2401           break;
2402         case MVT::f64:
2403           if (Subtarget.hasVSX())
2404             RC = &PPC::VSFRCRegClass;
2405           else
2406             RC = &PPC::F8RCRegClass;
2407           break;
2408         case MVT::v16i8:
2409         case MVT::v8i16:
2410         case MVT::v4i32:
2411         case MVT::v4f32:
2412           RC = &PPC::VRRCRegClass;
2413           break;
2414         case MVT::v2f64:
2415         case MVT::v2i64:
2416           RC = &PPC::VSHRCRegClass;
2417           break;
2418       }
2419
2420       // Transform the arguments stored in physical registers into virtual ones.
2421       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2422       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
2423                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
2424
2425       if (ValVT == MVT::i1)
2426         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
2427
2428       InVals.push_back(ArgValue);
2429     } else {
2430       // Argument stored in memory.
2431       assert(VA.isMemLoc());
2432
2433       unsigned ArgSize = VA.getLocVT().getStoreSize();
2434       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
2435                                       isImmutable);
2436
2437       // Create load nodes to retrieve arguments from the stack.
2438       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2439       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2440                                    MachinePointerInfo(),
2441                                    false, false, false, 0));
2442     }
2443   }
2444
2445   // Assign locations to all of the incoming aggregate by value arguments.
2446   // Aggregates passed by value are stored in the local variable space of the
2447   // caller's stack frame, right above the parameter list area.
2448   SmallVector<CCValAssign, 16> ByValArgLocs;
2449   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2450                       getTargetMachine(), ByValArgLocs, *DAG.getContext());
2451
2452   // Reserve stack space for the allocations in CCInfo.
2453   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
2454
2455   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
2456
2457   // Area that is at least reserved in the caller of this function.
2458   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
2459   MinReservedArea = std::max(MinReservedArea, LinkageSize);
2460
2461   // Set the size that is at least reserved in caller of this function.  Tail
2462   // call optimized function's reserved stack space needs to be aligned so that
2463   // taking the difference between two stack areas will result in an aligned
2464   // stack.
2465   MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
2466   FuncInfo->setMinReservedArea(MinReservedArea);
2467
2468   SmallVector<SDValue, 8> MemOps;
2469
2470   // If the function takes variable number of arguments, make a frame index for
2471   // the start of the first vararg value... for expansion of llvm.va_start.
2472   if (isVarArg) {
2473     static const MCPhysReg GPArgRegs[] = {
2474       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2475       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2476     };
2477     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
2478
2479     static const MCPhysReg FPArgRegs[] = {
2480       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2481       PPC::F8
2482     };
2483     const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
2484
2485     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
2486                                                           NumGPArgRegs));
2487     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
2488                                                           NumFPArgRegs));
2489
2490     // Make room for NumGPArgRegs and NumFPArgRegs.
2491     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
2492                 NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
2493
2494     FuncInfo->setVarArgsStackOffset(
2495       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
2496                              CCInfo.getNextStackOffset(), true));
2497
2498     FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
2499     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2500
2501     // The fixed integer arguments of a variadic function are stored to the
2502     // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
2503     // the result of va_next.
2504     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
2505       // Get an existing live-in vreg, or add a new one.
2506       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
2507       if (!VReg)
2508         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
2509
2510       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2511       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2512                                    MachinePointerInfo(), false, false, 0);
2513       MemOps.push_back(Store);
2514       // Increment the address by four for the next argument to store
2515       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
2516       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2517     }
2518
2519     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
2520     // is set.
2521     // The double arguments are stored to the VarArgsFrameIndex
2522     // on the stack.
2523     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
2524       // Get an existing live-in vreg, or add a new one.
2525       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
2526       if (!VReg)
2527         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
2528
2529       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
2530       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2531                                    MachinePointerInfo(), false, false, 0);
2532       MemOps.push_back(Store);
2533       // Increment the address by eight for the next argument to store
2534       SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
2535                                          PtrVT);
2536       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2537     }
2538   }
2539
2540   if (!MemOps.empty())
2541     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2542
2543   return Chain;
2544 }
2545
2546 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2547 // value to MVT::i64 and then truncate to the correct register size.
2548 SDValue
2549 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
2550                                      SelectionDAG &DAG, SDValue ArgVal,
2551                                      SDLoc dl) const {
2552   if (Flags.isSExt())
2553     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
2554                          DAG.getValueType(ObjectVT));
2555   else if (Flags.isZExt())
2556     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
2557                          DAG.getValueType(ObjectVT));
2558
2559   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
2560 }
2561
2562 SDValue
2563 PPCTargetLowering::LowerFormalArguments_64SVR4(
2564                                       SDValue Chain,
2565                                       CallingConv::ID CallConv, bool isVarArg,
2566                                       const SmallVectorImpl<ISD::InputArg>
2567                                         &Ins,
2568                                       SDLoc dl, SelectionDAG &DAG,
2569                                       SmallVectorImpl<SDValue> &InVals) const {
2570   // TODO: add description of PPC stack frame format, or at least some docs.
2571   //
2572   bool isELFv2ABI = Subtarget.isELFv2ABI();
2573   bool isLittleEndian = Subtarget.isLittleEndian();
2574   MachineFunction &MF = DAG.getMachineFunction();
2575   MachineFrameInfo *MFI = MF.getFrameInfo();
2576   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2577
2578   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2579   // Potential tail calls could cause overwriting of argument stack slots.
2580   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2581                        (CallConv == CallingConv::Fast));
2582   unsigned PtrByteSize = 8;
2583
2584   unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
2585                                                           isELFv2ABI);
2586
2587   static const MCPhysReg GPR[] = {
2588     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
2589     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
2590   };
2591
2592   static const MCPhysReg *FPR = GetFPR();
2593
2594   static const MCPhysReg VR[] = {
2595     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
2596     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
2597   };
2598   static const MCPhysReg VSRH[] = {
2599     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
2600     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
2601   };
2602
2603   const unsigned Num_GPR_Regs = array_lengthof(GPR);
2604   const unsigned Num_FPR_Regs = 13;
2605   const unsigned Num_VR_Regs  = array_lengthof(VR);
2606
2607   // Do a first pass over the arguments to determine whether the ABI
2608   // guarantees that our caller has allocated the parameter save area
2609   // on its stack frame.  In the ELFv1 ABI, this is always the case;
2610   // in the ELFv2 ABI, it is true if this is a vararg function or if
2611   // any parameter is located in a stack slot.
2612
2613   bool HasParameterArea = !isELFv2ABI || isVarArg;
2614   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
2615   unsigned NumBytes = LinkageSize;
2616   unsigned AvailableFPRs = Num_FPR_Regs;
2617   unsigned AvailableVRs = Num_VR_Regs;
2618   for (unsigned i = 0, e = Ins.size(); i != e; ++i)
2619     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
2620                                PtrByteSize, LinkageSize, ParamAreaSize,
2621                                NumBytes, AvailableFPRs, AvailableVRs))
2622       HasParameterArea = true;
2623
2624   // Add DAG nodes to load the arguments or copy them out of registers.  On
2625   // entry to a function on PPC, the arguments start after the linkage area,
2626   // although the first ones are often in registers.
2627
2628   unsigned ArgOffset = LinkageSize;
2629   unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
2630   SmallVector<SDValue, 8> MemOps;
2631   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
2632   unsigned CurArgIdx = 0;
2633   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
2634     SDValue ArgVal;
2635     bool needsLoad = false;
2636     EVT ObjectVT = Ins[ArgNo].VT;
2637     EVT OrigVT = Ins[ArgNo].ArgVT;
2638     unsigned ObjSize = ObjectVT.getStoreSize();
2639     unsigned ArgSize = ObjSize;
2640     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
2641     std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
2642     CurArgIdx = Ins[ArgNo].OrigArgIndex;
2643
2644     /* Respect alignment of argument on the stack.  */
2645     unsigned Align =
2646       CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
2647     ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
2648     unsigned CurArgOffset = ArgOffset;
2649
2650     /* Compute GPR index associated with argument offset.  */
2651     GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
2652     GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
2653
2654     // FIXME the codegen can be much improved in some cases.
2655     // We do not have to keep everything in memory.
2656     if (Flags.isByVal()) {
2657       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
2658       ObjSize = Flags.getByValSize();
2659       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2660       // Empty aggregate parameters do not take up registers.  Examples:
2661       //   struct { } a;
2662       //   union  { } b;
2663       //   int c[0];
2664       // etc.  However, we have to provide a place-holder in InVals, so
2665       // pretend we have an 8-byte item at the current address for that
2666       // purpose.
2667       if (!ObjSize) {
2668         int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
2669         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2670         InVals.push_back(FIN);
2671         continue;
2672       }
2673
2674       // Create a stack object covering all stack doublewords occupied
2675       // by the argument.  If the argument is (fully or partially) on
2676       // the stack, or if the argument is fully in registers but the
2677       // caller has allocated the parameter save anyway, we can refer
2678       // directly to the caller's stack frame.  Otherwise, create a
2679       // local copy in our own frame.
2680       int FI;
2681       if (HasParameterArea ||
2682           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
2683         FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
2684       else
2685         FI = MFI->CreateStackObject(ArgSize, Align, false);
2686       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2687
2688       // Handle aggregates smaller than 8 bytes.
2689       if (ObjSize < PtrByteSize) {
2690         // The value of the object is its address, which differs from the
2691         // address of the enclosing doubleword on big-endian systems.
2692         SDValue Arg = FIN;
2693         if (!isLittleEndian) {
2694           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT);
2695           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
2696         }
2697         InVals.push_back(Arg);
2698
2699         if (GPR_idx != Num_GPR_Regs) {
2700           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2701           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2702           SDValue Store;
2703
2704           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
2705             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
2706                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
2707             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
2708                                       MachinePointerInfo(FuncArg),
2709                                       ObjType, false, false, 0);
2710           } else {
2711             // For sizes that don't fit a truncating store (3, 5, 6, 7),
2712             // store the whole register as-is to the parameter save area
2713             // slot.
2714             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2715                                  MachinePointerInfo(FuncArg),
2716                                  false, false, 0);
2717           }
2718
2719           MemOps.push_back(Store);
2720         }
2721         // Whether we copied from a register or not, advance the offset
2722         // into the parameter save area by a full doubleword.
2723         ArgOffset += PtrByteSize;
2724         continue;
2725       }
2726
2727       // The value of the object is its address, which is the address of
2728       // its first stack doubleword.
2729       InVals.push_back(FIN);
2730
2731       // Store whatever pieces of the object are in registers to memory.
2732       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
2733         if (GPR_idx == Num_GPR_Regs)
2734           break;
2735
2736         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2737         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2738         SDValue Addr = FIN;
2739         if (j) {
2740           SDValue Off = DAG.getConstant(j, PtrVT);
2741           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
2742         }
2743         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
2744                                      MachinePointerInfo(FuncArg, j),
2745                                      false, false, 0);
2746         MemOps.push_back(Store);
2747         ++GPR_idx;
2748       }
2749       ArgOffset += ArgSize;
2750       continue;
2751     }
2752
2753     switch (ObjectVT.getSimpleVT().SimpleTy) {
2754     default: llvm_unreachable("Unhandled argument type!");
2755     case MVT::i1:
2756     case MVT::i32:
2757     case MVT::i64:
2758       // These can be scalar arguments or elements of an integer array type
2759       // passed directly.  Clang may use those instead of "byval" aggregate
2760       // types to avoid forcing arguments to memory unnecessarily.
2761       if (GPR_idx != Num_GPR_Regs) {
2762         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2763         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
2764
2765         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
2766           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2767           // value to MVT::i64 and then truncate to the correct register size.
2768           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
2769       } else {
2770         needsLoad = true;
2771         ArgSize = PtrByteSize;
2772       }
2773       ArgOffset += 8;
2774       break;
2775
2776     case MVT::f32:
2777     case MVT::f64:
2778       // These can be scalar arguments or elements of a float array type
2779       // passed directly.  The latter are used to implement ELFv2 homogenous
2780       // float aggregates.
2781       if (FPR_idx != Num_FPR_Regs) {
2782         unsigned VReg;
2783
2784         if (ObjectVT == MVT::f32)
2785           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
2786         else
2787           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ?
2788                                             &PPC::VSFRCRegClass :
2789                                             &PPC::F8RCRegClass);
2790
2791         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2792         ++FPR_idx;
2793       } else if (GPR_idx != Num_GPR_Regs) {
2794         // This can only ever happen in the presence of f32 array types,
2795         // since otherwise we never run out of FPRs before running out
2796         // of GPRs.
2797         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2798         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
2799
2800         if (ObjectVT == MVT::f32) {
2801           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
2802             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
2803                                  DAG.getConstant(32, MVT::i32));
2804           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
2805         }
2806
2807         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
2808       } else {
2809         needsLoad = true;
2810       }
2811
2812       // When passing an array of floats, the array occupies consecutive
2813       // space in the argument area; only round up to the next doubleword
2814       // at the end of the array.  Otherwise, each float takes 8 bytes.
2815       ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
2816       ArgOffset += ArgSize;
2817       if (Flags.isInConsecutiveRegsLast())
2818         ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2819       break;
2820     case MVT::v4f32:
2821     case MVT::v4i32:
2822     case MVT::v8i16:
2823     case MVT::v16i8:
2824     case MVT::v2f64:
2825     case MVT::v2i64:
2826       // These can be scalar arguments or elements of a vector array type
2827       // passed directly.  The latter are used to implement ELFv2 homogenous
2828       // vector aggregates.
2829       if (VR_idx != Num_VR_Regs) {
2830         unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
2831                         MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
2832                         MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
2833         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
2834         ++VR_idx;
2835       } else {
2836         needsLoad = true;
2837       }
2838       ArgOffset += 16;
2839       break;
2840     }
2841
2842     // We need to load the argument to a virtual register if we determined
2843     // above that we ran out of physical registers of the appropriate type.
2844     if (needsLoad) {
2845       if (ObjSize < ArgSize && !isLittleEndian)
2846         CurArgOffset += ArgSize - ObjSize;
2847       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
2848       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2849       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
2850                            false, false, false, 0);
2851     }
2852
2853     InVals.push_back(ArgVal);
2854   }
2855
2856   // Area that is at least reserved in the caller of this function.
2857   unsigned MinReservedArea;
2858   if (HasParameterArea)
2859     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
2860   else
2861     MinReservedArea = LinkageSize;
2862
2863   // Set the size that is at least reserved in caller of this function.  Tail
2864   // call optimized functions' reserved stack space needs to be aligned so that
2865   // taking the difference between two stack areas will result in an aligned
2866   // stack.
2867   MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
2868   FuncInfo->setMinReservedArea(MinReservedArea);
2869
2870   // If the function takes variable number of arguments, make a frame index for
2871   // the start of the first vararg value... for expansion of llvm.va_start.
2872   if (isVarArg) {
2873     int Depth = ArgOffset;
2874
2875     FuncInfo->setVarArgsFrameIndex(
2876       MFI->CreateFixedObject(PtrByteSize, Depth, true));
2877     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2878
2879     // If this function is vararg, store any remaining integer argument regs
2880     // to their spots on the stack so that they may be loaded by deferencing the
2881     // result of va_next.
2882     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
2883          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
2884       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
2885       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2886       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2887                                    MachinePointerInfo(), false, false, 0);
2888       MemOps.push_back(Store);
2889       // Increment the address by four for the next argument to store
2890       SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
2891       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2892     }
2893   }
2894
2895   if (!MemOps.empty())
2896     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2897
2898   return Chain;
2899 }
2900
2901 SDValue
2902 PPCTargetLowering::LowerFormalArguments_Darwin(
2903                                       SDValue Chain,
2904                                       CallingConv::ID CallConv, bool isVarArg,
2905                                       const SmallVectorImpl<ISD::InputArg>
2906                                         &Ins,
2907                                       SDLoc dl, SelectionDAG &DAG,
2908                                       SmallVectorImpl<SDValue> &InVals) const {
2909   // TODO: add description of PPC stack frame format, or at least some docs.
2910   //
2911   MachineFunction &MF = DAG.getMachineFunction();
2912   MachineFrameInfo *MFI = MF.getFrameInfo();
2913   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2914
2915   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2916   bool isPPC64 = PtrVT == MVT::i64;
2917   // Potential tail calls could cause overwriting of argument stack slots.
2918   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2919                        (CallConv == CallingConv::Fast));
2920   unsigned PtrByteSize = isPPC64 ? 8 : 4;
2921
2922   unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
2923                                                           false);
2924   unsigned ArgOffset = LinkageSize;
2925   // Area that is at least reserved in caller of this function.
2926   unsigned MinReservedArea = ArgOffset;
2927
2928   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
2929     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2930     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2931   };
2932   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
2933     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
2934     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
2935   };
2936
2937   static const MCPhysReg *FPR = GetFPR();
2938
2939   static const MCPhysReg VR[] = {
2940     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
2941     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
2942   };
2943
2944   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
2945   const unsigned Num_FPR_Regs = 13;
2946   const unsigned Num_VR_Regs  = array_lengthof( VR);
2947
2948   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
2949
2950   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
2951
2952   // In 32-bit non-varargs functions, the stack space for vectors is after the
2953   // stack space for non-vectors.  We do not use this space unless we have
2954   // too many vectors to fit in registers, something that only occurs in
2955   // constructed examples:), but we have to walk the arglist to figure
2956   // that out...for the pathological case, compute VecArgOffset as the
2957   // start of the vector parameter area.  Computing VecArgOffset is the
2958   // entire point of the following loop.
2959   unsigned VecArgOffset = ArgOffset;
2960   if (!isVarArg && !isPPC64) {
2961     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
2962          ++ArgNo) {
2963       EVT ObjectVT = Ins[ArgNo].VT;
2964       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
2965
2966       if (Flags.isByVal()) {
2967         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
2968         unsigned ObjSize = Flags.getByValSize();
2969         unsigned ArgSize =
2970                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2971         VecArgOffset += ArgSize;
2972         continue;
2973       }
2974
2975       switch(ObjectVT.getSimpleVT().SimpleTy) {
2976       default: llvm_unreachable("Unhandled argument type!");
2977       case MVT::i1:
2978       case MVT::i32:
2979       case MVT::f32:
2980         VecArgOffset += 4;
2981         break;
2982       case MVT::i64:  // PPC64
2983       case MVT::f64:
2984         // FIXME: We are guaranteed to be !isPPC64 at this point.
2985         // Does MVT::i64 apply?
2986         VecArgOffset += 8;
2987         break;
2988       case MVT::v4f32:
2989       case MVT::v4i32:
2990       case MVT::v8i16:
2991       case MVT::v16i8:
2992         // Nothing to do, we're only looking at Nonvector args here.
2993         break;
2994       }
2995     }
2996   }
2997   // We've found where the vector parameter area in memory is.  Skip the
2998   // first 12 parameters; these don't use that memory.
2999   VecArgOffset = ((VecArgOffset+15)/16)*16;
3000   VecArgOffset += 12*16;
3001
3002   // Add DAG nodes to load the arguments or copy them out of registers.  On
3003   // entry to a function on PPC, the arguments start after the linkage area,
3004   // although the first ones are often in registers.
3005
3006   SmallVector<SDValue, 8> MemOps;
3007   unsigned nAltivecParamsAtEnd = 0;
3008   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3009   unsigned CurArgIdx = 0;
3010   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3011     SDValue ArgVal;
3012     bool needsLoad = false;
3013     EVT ObjectVT = Ins[ArgNo].VT;
3014     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3015     unsigned ArgSize = ObjSize;
3016     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3017     std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
3018     CurArgIdx = Ins[ArgNo].OrigArgIndex;
3019
3020     unsigned CurArgOffset = ArgOffset;
3021
3022     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3023     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
3024         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
3025       if (isVarArg || isPPC64) {
3026         MinReservedArea = ((MinReservedArea+15)/16)*16;
3027         MinReservedArea += CalculateStackSlotSize(ObjectVT,
3028                                                   Flags,
3029                                                   PtrByteSize);
3030       } else  nAltivecParamsAtEnd++;
3031     } else
3032       // Calculate min reserved area.
3033       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
3034                                                 Flags,
3035                                                 PtrByteSize);
3036
3037     // FIXME the codegen can be much improved in some cases.
3038     // We do not have to keep everything in memory.
3039     if (Flags.isByVal()) {
3040       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3041       ObjSize = Flags.getByValSize();
3042       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3043       // Objects of size 1 and 2 are right justified, everything else is
3044       // left justified.  This means the memory address is adjusted forwards.
3045       if (ObjSize==1 || ObjSize==2) {
3046         CurArgOffset = CurArgOffset + (4 - ObjSize);
3047       }
3048       // The value of the object is its address.
3049       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
3050       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3051       InVals.push_back(FIN);
3052       if (ObjSize==1 || ObjSize==2) {
3053         if (GPR_idx != Num_GPR_Regs) {
3054           unsigned VReg;
3055           if (isPPC64)
3056             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3057           else
3058             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3059           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3060           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
3061           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
3062                                             MachinePointerInfo(FuncArg),
3063                                             ObjType, false, false, 0);
3064           MemOps.push_back(Store);
3065           ++GPR_idx;
3066         }
3067
3068         ArgOffset += PtrByteSize;
3069
3070         continue;
3071       }
3072       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3073         // Store whatever pieces of the object are in registers
3074         // to memory.  ArgOffset will be the address of the beginning
3075         // of the object.
3076         if (GPR_idx != Num_GPR_Regs) {
3077           unsigned VReg;
3078           if (isPPC64)
3079             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3080           else
3081             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3082           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
3083           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3084           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3085           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3086                                        MachinePointerInfo(FuncArg, j),
3087                                        false, false, 0);
3088           MemOps.push_back(Store);
3089           ++GPR_idx;
3090           ArgOffset += PtrByteSize;
3091         } else {
3092           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
3093           break;
3094         }
3095       }
3096       continue;
3097     }
3098
3099     switch (ObjectVT.getSimpleVT().SimpleTy) {
3100     default: llvm_unreachable("Unhandled argument type!");
3101     case MVT::i1:
3102     case MVT::i32:
3103       if (!isPPC64) {
3104         if (GPR_idx != Num_GPR_Regs) {
3105           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3106           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3107
3108           if (ObjectVT == MVT::i1)
3109             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
3110
3111           ++GPR_idx;
3112         } else {
3113           needsLoad = true;
3114           ArgSize = PtrByteSize;
3115         }
3116         // All int arguments reserve stack space in the Darwin ABI.
3117         ArgOffset += PtrByteSize;
3118         break;
3119       }
3120       // FALLTHROUGH
3121     case MVT::i64:  // PPC64
3122       if (GPR_idx != Num_GPR_Regs) {
3123         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3124         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3125
3126         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3127           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3128           // value to MVT::i64 and then truncate to the correct register size.
3129           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3130
3131         ++GPR_idx;
3132       } else {
3133         needsLoad = true;
3134         ArgSize = PtrByteSize;
3135       }
3136       // All int arguments reserve stack space in the Darwin ABI.
3137       ArgOffset += 8;
3138       break;
3139
3140     case MVT::f32:
3141     case MVT::f64:
3142       // Every 4 bytes of argument space consumes one of the GPRs available for
3143       // argument passing.
3144       if (GPR_idx != Num_GPR_Regs) {
3145         ++GPR_idx;
3146         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
3147           ++GPR_idx;
3148       }
3149       if (FPR_idx != Num_FPR_Regs) {
3150         unsigned VReg;
3151
3152         if (ObjectVT == MVT::f32)
3153           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
3154         else
3155           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
3156
3157         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3158         ++FPR_idx;
3159       } else {
3160         needsLoad = true;
3161       }
3162
3163       // All FP arguments reserve stack space in the Darwin ABI.
3164       ArgOffset += isPPC64 ? 8 : ObjSize;
3165       break;
3166     case MVT::v4f32:
3167     case MVT::v4i32:
3168     case MVT::v8i16:
3169     case MVT::v16i8:
3170       // Note that vector arguments in registers don't reserve stack space,
3171       // except in varargs functions.
3172       if (VR_idx != Num_VR_Regs) {
3173         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3174         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3175         if (isVarArg) {
3176           while ((ArgOffset % 16) != 0) {
3177             ArgOffset += PtrByteSize;
3178             if (GPR_idx != Num_GPR_Regs)
3179               GPR_idx++;
3180           }
3181           ArgOffset += 16;
3182           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
3183         }
3184         ++VR_idx;
3185       } else {
3186         if (!isVarArg && !isPPC64) {
3187           // Vectors go after all the nonvectors.
3188           CurArgOffset = VecArgOffset;
3189           VecArgOffset += 16;
3190         } else {
3191           // Vectors are aligned.
3192           ArgOffset = ((ArgOffset+15)/16)*16;
3193           CurArgOffset = ArgOffset;
3194           ArgOffset += 16;
3195         }
3196         needsLoad = true;
3197       }
3198       break;
3199     }
3200
3201     // We need to load the argument to a virtual register if we determined above
3202     // that we ran out of physical registers of the appropriate type.
3203     if (needsLoad) {
3204       int FI = MFI->CreateFixedObject(ObjSize,
3205                                       CurArgOffset + (ArgSize - ObjSize),
3206                                       isImmutable);
3207       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3208       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
3209                            false, false, false, 0);
3210     }
3211
3212     InVals.push_back(ArgVal);
3213   }
3214
3215   // Allow for Altivec parameters at the end, if needed.
3216   if (nAltivecParamsAtEnd) {
3217     MinReservedArea = ((MinReservedArea+15)/16)*16;
3218     MinReservedArea += 16*nAltivecParamsAtEnd;
3219   }
3220
3221   // Area that is at least reserved in the caller of this function.
3222   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
3223
3224   // Set the size that is at least reserved in caller of this function.  Tail
3225   // call optimized functions' reserved stack space needs to be aligned so that
3226   // taking the difference between two stack areas will result in an aligned
3227   // stack.
3228   MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
3229   FuncInfo->setMinReservedArea(MinReservedArea);
3230
3231   // If the function takes variable number of arguments, make a frame index for
3232   // the start of the first vararg value... for expansion of llvm.va_start.
3233   if (isVarArg) {
3234     int Depth = ArgOffset;
3235
3236     FuncInfo->setVarArgsFrameIndex(
3237       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
3238                              Depth, true));
3239     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3240
3241     // If this function is vararg, store any remaining integer argument regs
3242     // to their spots on the stack so that they may be loaded by deferencing the
3243     // result of va_next.
3244     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
3245       unsigned VReg;
3246
3247       if (isPPC64)
3248         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3249       else
3250         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3251
3252       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3253       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3254                                    MachinePointerInfo(), false, false, 0);
3255       MemOps.push_back(Store);
3256       // Increment the address by four for the next argument to store
3257       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
3258       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3259     }
3260   }
3261
3262   if (!MemOps.empty())
3263     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3264
3265   return Chain;
3266 }
3267
3268 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
3269 /// adjusted to accommodate the arguments for the tailcall.
3270 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
3271                                    unsigned ParamSize) {
3272
3273   if (!isTailCall) return 0;
3274
3275   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3276   unsigned CallerMinReservedArea = FI->getMinReservedArea();
3277   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
3278   // Remember only if the new adjustement is bigger.
3279   if (SPDiff < FI->getTailCallSPDelta())
3280     FI->setTailCallSPDelta(SPDiff);
3281
3282   return SPDiff;
3283 }
3284
3285 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3286 /// for tail call optimization. Targets which want to do tail call
3287 /// optimization should implement this function.
3288 bool
3289 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
3290                                                      CallingConv::ID CalleeCC,
3291                                                      bool isVarArg,
3292                                       const SmallVectorImpl<ISD::InputArg> &Ins,
3293                                                      SelectionDAG& DAG) const {
3294   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
3295     return false;
3296
3297   // Variable argument functions are not supported.
3298   if (isVarArg)
3299     return false;
3300
3301   MachineFunction &MF = DAG.getMachineFunction();
3302   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
3303   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
3304     // Functions containing by val parameters are not supported.
3305     for (unsigned i = 0; i != Ins.size(); i++) {
3306        ISD::ArgFlagsTy Flags = Ins[i].Flags;
3307        if (Flags.isByVal()) return false;
3308     }
3309
3310     // Non-PIC/GOT tail calls are supported.
3311     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
3312       return true;
3313
3314     // At the moment we can only do local tail calls (in same module, hidden
3315     // or protected) if we are generating PIC.
3316     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
3317       return G->getGlobal()->hasHiddenVisibility()
3318           || G->getGlobal()->hasProtectedVisibility();
3319   }
3320
3321   return false;
3322 }
3323
3324 /// isCallCompatibleAddress - Return the immediate to use if the specified
3325 /// 32-bit value is representable in the immediate field of a BxA instruction.
3326 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
3327   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
3328   if (!C) return nullptr;
3329
3330   int Addr = C->getZExtValue();
3331   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
3332       SignExtend32<26>(Addr) != Addr)
3333     return nullptr;  // Top 6 bits have to be sext of immediate.
3334
3335   return DAG.getConstant((int)C->getZExtValue() >> 2,
3336                          DAG.getTargetLoweringInfo().getPointerTy()).getNode();
3337 }
3338
3339 namespace {
3340
3341 struct TailCallArgumentInfo {
3342   SDValue Arg;
3343   SDValue FrameIdxOp;
3344   int       FrameIdx;
3345
3346   TailCallArgumentInfo() : FrameIdx(0) {}
3347 };
3348
3349 }
3350
3351 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
3352 static void
3353 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
3354                                            SDValue Chain,
3355                    const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
3356                    SmallVectorImpl<SDValue> &MemOpChains,
3357                    SDLoc dl) {
3358   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
3359     SDValue Arg = TailCallArgs[i].Arg;
3360     SDValue FIN = TailCallArgs[i].FrameIdxOp;
3361     int FI = TailCallArgs[i].FrameIdx;
3362     // Store relative to framepointer.
3363     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
3364                                        MachinePointerInfo::getFixedStack(FI),
3365                                        false, false, 0));
3366   }
3367 }
3368
3369 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
3370 /// the appropriate stack slot for the tail call optimized function call.
3371 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
3372                                                MachineFunction &MF,
3373                                                SDValue Chain,
3374                                                SDValue OldRetAddr,
3375                                                SDValue OldFP,
3376                                                int SPDiff,
3377                                                bool isPPC64,
3378                                                bool isDarwinABI,
3379                                                SDLoc dl) {
3380   if (SPDiff) {
3381     // Calculate the new stack slot for the return address.
3382     int SlotSize = isPPC64 ? 8 : 4;
3383     int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
3384                                                                    isDarwinABI);
3385     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
3386                                                           NewRetAddrLoc, true);
3387     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3388     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
3389     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
3390                          MachinePointerInfo::getFixedStack(NewRetAddr),
3391                          false, false, 0);
3392
3393     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
3394     // slot as the FP is never overwritten.
3395     if (isDarwinABI) {
3396       int NewFPLoc =
3397         SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
3398       int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
3399                                                           true);
3400       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
3401       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
3402                            MachinePointerInfo::getFixedStack(NewFPIdx),
3403                            false, false, 0);
3404     }
3405   }
3406   return Chain;
3407 }
3408
3409 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
3410 /// the position of the argument.
3411 static void
3412 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
3413                          SDValue Arg, int SPDiff, unsigned ArgOffset,
3414                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
3415   int Offset = ArgOffset + SPDiff;
3416   uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
3417   int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3418   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3419   SDValue FIN = DAG.getFrameIndex(FI, VT);
3420   TailCallArgumentInfo Info;
3421   Info.Arg = Arg;
3422   Info.FrameIdxOp = FIN;
3423   Info.FrameIdx = FI;
3424   TailCallArguments.push_back(Info);
3425 }
3426
3427 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
3428 /// stack slot. Returns the chain as result and the loaded frame pointers in
3429 /// LROpOut/FPOpout. Used when tail calling.
3430 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
3431                                                         int SPDiff,
3432                                                         SDValue Chain,
3433                                                         SDValue &LROpOut,
3434                                                         SDValue &FPOpOut,
3435                                                         bool isDarwinABI,
3436                                                         SDLoc dl) const {
3437   if (SPDiff) {
3438     // Load the LR and FP stack slot for later adjusting.
3439     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
3440     LROpOut = getReturnAddrFrameIndex(DAG);
3441     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
3442                           false, false, false, 0);
3443     Chain = SDValue(LROpOut.getNode(), 1);
3444
3445     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
3446     // slot as the FP is never overwritten.
3447     if (isDarwinABI) {
3448       FPOpOut = getFramePointerFrameIndex(DAG);
3449       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
3450                             false, false, false, 0);
3451       Chain = SDValue(FPOpOut.getNode(), 1);
3452     }
3453   }
3454   return Chain;
3455 }
3456
3457 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
3458 /// by "Src" to address "Dst" of size "Size".  Alignment information is
3459 /// specified by the specific parameter attribute. The copy will be passed as
3460 /// a byval function parameter.
3461 /// Sometimes what we are copying is the end of a larger object, the part that
3462 /// does not fit in registers.
3463 static SDValue
3464 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
3465                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
3466                           SDLoc dl) {
3467   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
3468   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
3469                        false, false, MachinePointerInfo(),
3470                        MachinePointerInfo());
3471 }
3472
3473 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
3474 /// tail calls.
3475 static void
3476 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
3477                  SDValue Arg, SDValue PtrOff, int SPDiff,
3478                  unsigned ArgOffset, bool isPPC64, bool isTailCall,
3479                  bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
3480                  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
3481                  SDLoc dl) {
3482   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3483   if (!isTailCall) {
3484     if (isVector) {
3485       SDValue StackPtr;
3486       if (isPPC64)
3487         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
3488       else
3489         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
3490       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
3491                            DAG.getConstant(ArgOffset, PtrVT));
3492     }
3493     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
3494                                        MachinePointerInfo(), false, false, 0));
3495   // Calculate and remember argument location.
3496   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
3497                                   TailCallArguments);
3498 }
3499
3500 static
3501 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
3502                      SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
3503                      SDValue LROp, SDValue FPOp, bool isDarwinABI,
3504                      SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
3505   MachineFunction &MF = DAG.getMachineFunction();
3506
3507   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
3508   // might overwrite each other in case of tail call optimization.
3509   SmallVector<SDValue, 8> MemOpChains2;
3510   // Do not flag preceding copytoreg stuff together with the following stuff.
3511   InFlag = SDValue();
3512   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
3513                                     MemOpChains2, dl);
3514   if (!MemOpChains2.empty())
3515     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3516
3517   // Store the return address to the appropriate stack slot.
3518   Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
3519                                         isPPC64, isDarwinABI, dl);
3520
3521   // Emit callseq_end just before tailcall node.
3522   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
3523                              DAG.getIntPtrConstant(0, true), InFlag, dl);
3524   InFlag = Chain.getValue(1);
3525 }
3526
3527 static
3528 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
3529                      SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
3530                      SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
3531                      SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
3532                      const PPCSubtarget &Subtarget) {
3533
3534   bool isPPC64 = Subtarget.isPPC64();
3535   bool isSVR4ABI = Subtarget.isSVR4ABI();
3536   bool isELFv2ABI = Subtarget.isELFv2ABI();
3537
3538   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3539   NodeTys.push_back(MVT::Other);   // Returns a chain
3540   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
3541
3542   unsigned CallOpc = PPCISD::CALL;
3543
3544   bool needIndirectCall = true;
3545   if (!isSVR4ABI || !isPPC64)
3546     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
3547       // If this is an absolute destination address, use the munged value.
3548       Callee = SDValue(Dest, 0);
3549       needIndirectCall = false;
3550     }
3551
3552   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3553     // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
3554     // Use indirect calls for ALL functions calls in JIT mode, since the
3555     // far-call stubs may be outside relocation limits for a BL instruction.
3556     if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
3557       unsigned OpFlags = 0;
3558       if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
3559           (Subtarget.getTargetTriple().isMacOSX() &&
3560            Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
3561           (G->getGlobal()->isDeclaration() ||
3562            G->getGlobal()->isWeakForLinker())) ||
3563           (Subtarget.isTargetELF() && !isPPC64 &&
3564            !G->getGlobal()->hasLocalLinkage() &&
3565            DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
3566         // PC-relative references to external symbols should go through $stub,
3567         // unless we're building with the leopard linker or later, which
3568         // automatically synthesizes these stubs.
3569         OpFlags = PPCII::MO_PLT_OR_STUB;
3570       }
3571
3572       // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
3573       // every direct call is) turn it into a TargetGlobalAddress /
3574       // TargetExternalSymbol node so that legalize doesn't hack it.
3575       Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
3576                                           Callee.getValueType(),
3577                                           0, OpFlags);
3578       needIndirectCall = false;
3579     }
3580   }
3581
3582   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3583     unsigned char OpFlags = 0;
3584
3585     if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
3586          (Subtarget.getTargetTriple().isMacOSX() &&
3587           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
3588         (Subtarget.isTargetELF() && !isPPC64 &&
3589          DAG.getTarget().getRelocationModel() == Reloc::PIC_)   ) {
3590       // PC-relative references to external symbols should go through $stub,
3591       // unless we're building with the leopard linker or later, which
3592       // automatically synthesizes these stubs.
3593       OpFlags = PPCII::MO_PLT_OR_STUB;
3594     }
3595
3596     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
3597                                          OpFlags);
3598     needIndirectCall = false;
3599   }
3600
3601   if (needIndirectCall) {
3602     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
3603     // to do the call, we can't use PPCISD::CALL.
3604     SDValue MTCTROps[] = {Chain, Callee, InFlag};
3605
3606     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
3607       // Function pointers in the 64-bit SVR4 ABI do not point to the function
3608       // entry point, but to the function descriptor (the function entry point
3609       // address is part of the function descriptor though).
3610       // The function descriptor is a three doubleword structure with the
3611       // following fields: function entry point, TOC base address and
3612       // environment pointer.
3613       // Thus for a call through a function pointer, the following actions need
3614       // to be performed:
3615       //   1. Save the TOC of the caller in the TOC save area of its stack
3616       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
3617       //   2. Load the address of the function entry point from the function
3618       //      descriptor.
3619       //   3. Load the TOC of the callee from the function descriptor into r2.
3620       //   4. Load the environment pointer from the function descriptor into
3621       //      r11.
3622       //   5. Branch to the function entry point address.
3623       //   6. On return of the callee, the TOC of the caller needs to be
3624       //      restored (this is done in FinishCall()).
3625       //
3626       // All those operations are flagged together to ensure that no other
3627       // operations can be scheduled in between. E.g. without flagging the
3628       // operations together, a TOC access in the caller could be scheduled
3629       // between the load of the callee TOC and the branch to the callee, which
3630       // results in the TOC access going through the TOC of the callee instead
3631       // of going through the TOC of the caller, which leads to incorrect code.
3632
3633       // Load the address of the function entry point from the function
3634       // descriptor.
3635       SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
3636       SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs,
3637                               makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
3638       Chain = LoadFuncPtr.getValue(1);
3639       InFlag = LoadFuncPtr.getValue(2);
3640
3641       // Load environment pointer into r11.
3642       // Offset of the environment pointer within the function descriptor.
3643       SDValue PtrOff = DAG.getIntPtrConstant(16);
3644
3645       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
3646       SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
3647                                        InFlag);
3648       Chain = LoadEnvPtr.getValue(1);
3649       InFlag = LoadEnvPtr.getValue(2);
3650
3651       SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
3652                                         InFlag);
3653       Chain = EnvVal.getValue(0);
3654       InFlag = EnvVal.getValue(1);
3655
3656       // Load TOC of the callee into r2. We are using a target-specific load
3657       // with r2 hard coded, because the result of a target-independent load
3658       // would never go directly into r2, since r2 is a reserved register (which
3659       // prevents the register allocator from allocating it), resulting in an
3660       // additional register being allocated and an unnecessary move instruction
3661       // being generated.
3662       VTs = DAG.getVTList(MVT::Other, MVT::Glue);
3663       SDValue TOCOff = DAG.getIntPtrConstant(8);
3664       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
3665       SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
3666                                        AddTOC, InFlag);
3667       Chain = LoadTOCPtr.getValue(0);
3668       InFlag = LoadTOCPtr.getValue(1);
3669
3670       MTCTROps[0] = Chain;
3671       MTCTROps[1] = LoadFuncPtr;
3672       MTCTROps[2] = InFlag;
3673     }
3674
3675     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
3676                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
3677     InFlag = Chain.getValue(1);
3678
3679     NodeTys.clear();
3680     NodeTys.push_back(MVT::Other);
3681     NodeTys.push_back(MVT::Glue);
3682     Ops.push_back(Chain);
3683     CallOpc = PPCISD::BCTRL;
3684     Callee.setNode(nullptr);
3685     // Add use of X11 (holding environment pointer)
3686     if (isSVR4ABI && isPPC64 && !isELFv2ABI)
3687       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
3688     // Add CTR register as callee so a bctr can be emitted later.
3689     if (isTailCall)
3690       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
3691   }
3692
3693   // If this is a direct call, pass the chain and the callee.
3694   if (Callee.getNode()) {
3695     Ops.push_back(Chain);
3696     Ops.push_back(Callee);
3697   }
3698   // If this is a tail call add stack pointer delta.
3699   if (isTailCall)
3700     Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
3701
3702   // Add argument registers to the end of the list so that they are known live
3703   // into the call.
3704   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3705     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3706                                   RegsToPass[i].second.getValueType()));
3707
3708   // Direct calls in the ELFv2 ABI need the TOC register live into the call.
3709   if (Callee.getNode() && isELFv2ABI)
3710     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
3711
3712   return CallOpc;
3713 }
3714
3715 static
3716 bool isLocalCall(const SDValue &Callee)
3717 {
3718   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
3719     return !G->getGlobal()->isDeclaration() &&
3720            !G->getGlobal()->isWeakForLinker();
3721   return false;
3722 }
3723
3724 SDValue
3725 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
3726                                    CallingConv::ID CallConv, bool isVarArg,
3727                                    const SmallVectorImpl<ISD::InputArg> &Ins,
3728                                    SDLoc dl, SelectionDAG &DAG,
3729                                    SmallVectorImpl<SDValue> &InVals) const {
3730
3731   SmallVector<CCValAssign, 16> RVLocs;
3732   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3733                     getTargetMachine(), RVLocs, *DAG.getContext());
3734   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
3735
3736   // Copy all of the result registers out of their specified physreg.
3737   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
3738     CCValAssign &VA = RVLocs[i];
3739     assert(VA.isRegLoc() && "Can only return in registers!");
3740
3741     SDValue Val = DAG.getCopyFromReg(Chain, dl,
3742                                      VA.getLocReg(), VA.getLocVT(), InFlag);
3743     Chain = Val.getValue(1);
3744     InFlag = Val.getValue(2);
3745
3746     switch (VA.getLocInfo()) {
3747     default: llvm_unreachable("Unknown loc info!");
3748     case CCValAssign::Full: break;
3749     case CCValAssign::AExt:
3750       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3751       break;
3752     case CCValAssign::ZExt:
3753       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
3754                         DAG.getValueType(VA.getValVT()));
3755       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3756       break;
3757     case CCValAssign::SExt:
3758       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
3759                         DAG.getValueType(VA.getValVT()));
3760       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3761       break;
3762     }
3763
3764     InVals.push_back(Val);
3765   }
3766
3767   return Chain;
3768 }
3769
3770 SDValue
3771 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
3772                               bool isTailCall, bool isVarArg,
3773                               SelectionDAG &DAG,
3774                               SmallVector<std::pair<unsigned, SDValue>, 8>
3775                                 &RegsToPass,
3776                               SDValue InFlag, SDValue Chain,
3777                               SDValue &Callee,
3778                               int SPDiff, unsigned NumBytes,
3779                               const SmallVectorImpl<ISD::InputArg> &Ins,
3780                               SmallVectorImpl<SDValue> &InVals) const {
3781
3782   bool isELFv2ABI = Subtarget.isELFv2ABI();
3783   std::vector<EVT> NodeTys;
3784   SmallVector<SDValue, 8> Ops;
3785   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
3786                                  isTailCall, RegsToPass, Ops, NodeTys,
3787                                  Subtarget);
3788
3789   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
3790   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
3791     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
3792
3793   // When performing tail call optimization the callee pops its arguments off
3794   // the stack. Account for this here so these bytes can be pushed back on in
3795   // PPCFrameLowering::eliminateCallFramePseudoInstr.
3796   int BytesCalleePops =
3797     (CallConv == CallingConv::Fast &&
3798      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
3799
3800   // Add a register mask operand representing the call-preserved registers.
3801   const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
3802   const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
3803   assert(Mask && "Missing call preserved mask for calling convention");
3804   Ops.push_back(DAG.getRegisterMask(Mask));
3805
3806   if (InFlag.getNode())
3807     Ops.push_back(InFlag);
3808
3809   // Emit tail call.
3810   if (isTailCall) {
3811     assert(((Callee.getOpcode() == ISD::Register &&
3812              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
3813             Callee.getOpcode() == ISD::TargetExternalSymbol ||
3814             Callee.getOpcode() == ISD::TargetGlobalAddress ||
3815             isa<ConstantSDNode>(Callee)) &&
3816     "Expecting an global address, external symbol, absolute value or register");
3817
3818     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
3819   }
3820
3821   // Add a NOP immediately after the branch instruction when using the 64-bit
3822   // SVR4 ABI. At link time, if caller and callee are in a different module and
3823   // thus have a different TOC, the call will be replaced with a call to a stub
3824   // function which saves the current TOC, loads the TOC of the callee and
3825   // branches to the callee. The NOP will be replaced with a load instruction
3826   // which restores the TOC of the caller from the TOC save slot of the current
3827   // stack frame. If caller and callee belong to the same module (and have the
3828   // same TOC), the NOP will remain unchanged.
3829
3830   bool needsTOCRestore = false;
3831   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) {
3832     if (CallOpc == PPCISD::BCTRL) {
3833       // This is a call through a function pointer.
3834       // Restore the caller TOC from the save area into R2.
3835       // See PrepareCall() for more information about calls through function
3836       // pointers in the 64-bit SVR4 ABI.
3837       // We are using a target-specific load with r2 hard coded, because the
3838       // result of a target-independent load would never go directly into r2,
3839       // since r2 is a reserved register (which prevents the register allocator
3840       // from allocating it), resulting in an additional register being
3841       // allocated and an unnecessary move instruction being generated.
3842       needsTOCRestore = true;
3843     } else if ((CallOpc == PPCISD::CALL) &&
3844                (!isLocalCall(Callee) ||
3845                 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
3846       // Otherwise insert NOP for non-local calls.
3847       CallOpc = PPCISD::CALL_NOP;
3848     }
3849   }
3850
3851   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
3852   InFlag = Chain.getValue(1);
3853
3854   if (needsTOCRestore) {
3855     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
3856     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3857     SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
3858     unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
3859     SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
3860     SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
3861     Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
3862     InFlag = Chain.getValue(1);
3863   }
3864
3865   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
3866                              DAG.getIntPtrConstant(BytesCalleePops, true),
3867                              InFlag, dl);
3868   if (!Ins.empty())
3869     InFlag = Chain.getValue(1);
3870
3871   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
3872                          Ins, dl, DAG, InVals);
3873 }
3874
3875 SDValue
3876 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3877                              SmallVectorImpl<SDValue> &InVals) const {
3878   SelectionDAG &DAG                     = CLI.DAG;
3879   SDLoc &dl                             = CLI.DL;
3880   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3881   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
3882   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
3883   SDValue Chain                         = CLI.Chain;
3884   SDValue Callee                        = CLI.Callee;
3885   bool &isTailCall                      = CLI.IsTailCall;
3886   CallingConv::ID CallConv              = CLI.CallConv;
3887   bool isVarArg                         = CLI.IsVarArg;
3888
3889   if (isTailCall)
3890     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
3891                                                    Ins, DAG);
3892
3893   if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
3894     report_fatal_error("failed to perform tail call elimination on a call "
3895                        "site marked musttail");
3896
3897   if (Subtarget.isSVR4ABI()) {
3898     if (Subtarget.isPPC64())
3899       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
3900                               isTailCall, Outs, OutVals, Ins,
3901                               dl, DAG, InVals);
3902     else
3903       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
3904                               isTailCall, Outs, OutVals, Ins,
3905                               dl, DAG, InVals);
3906   }
3907
3908   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
3909                           isTailCall, Outs, OutVals, Ins,
3910                           dl, DAG, InVals);
3911 }
3912
3913 SDValue
3914 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
3915                                     CallingConv::ID CallConv, bool isVarArg,
3916                                     bool isTailCall,
3917                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
3918                                     const SmallVectorImpl<SDValue> &OutVals,
3919                                     const SmallVectorImpl<ISD::InputArg> &Ins,
3920                                     SDLoc dl, SelectionDAG &DAG,
3921                                     SmallVectorImpl<SDValue> &InVals) const {
3922   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
3923   // of the 32-bit SVR4 ABI stack frame layout.
3924
3925   assert((CallConv == CallingConv::C ||
3926           CallConv == CallingConv::Fast) && "Unknown calling convention!");
3927
3928   unsigned PtrByteSize = 4;
3929
3930   MachineFunction &MF = DAG.getMachineFunction();
3931
3932   // Mark this function as potentially containing a function that contains a
3933   // tail call. As a consequence the frame pointer will be used for dynamicalloc
3934   // and restoring the callers stack pointer in this functions epilog. This is
3935   // done because by tail calling the called function might overwrite the value
3936   // in this function's (MF) stack pointer stack slot 0(SP).
3937   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
3938       CallConv == CallingConv::Fast)
3939     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
3940
3941   // Count how many bytes are to be pushed on the stack, including the linkage
3942   // area, parameter list area and the part of the local variable space which
3943   // contains copies of aggregates which are passed by value.
3944
3945   // Assign locations to all of the outgoing arguments.
3946   SmallVector<CCValAssign, 16> ArgLocs;
3947   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3948                  getTargetMachine(), ArgLocs, *DAG.getContext());
3949
3950   // Reserve space for the linkage area on the stack.
3951   CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),
3952                        PtrByteSize);
3953
3954   if (isVarArg) {
3955     // Handle fixed and variable vector arguments differently.
3956     // Fixed vector arguments go into registers as long as registers are
3957     // available. Variable vector arguments always go into memory.
3958     unsigned NumArgs = Outs.size();
3959
3960     for (unsigned i = 0; i != NumArgs; ++i) {
3961       MVT ArgVT = Outs[i].VT;
3962       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3963       bool Result;
3964
3965       if (Outs[i].IsFixed) {
3966         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
3967                                CCInfo);
3968       } else {
3969         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
3970                                       ArgFlags, CCInfo);
3971       }
3972
3973       if (Result) {
3974 #ifndef NDEBUG
3975         errs() << "Call operand #" << i << " has unhandled type "
3976              << EVT(ArgVT).getEVTString() << "\n";
3977 #endif
3978         llvm_unreachable(nullptr);
3979       }
3980     }
3981   } else {
3982     // All arguments are treated the same.
3983     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
3984   }
3985
3986   // Assign locations to all of the outgoing aggregate by value arguments.
3987   SmallVector<CCValAssign, 16> ByValArgLocs;
3988   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3989                       getTargetMachine(), ByValArgLocs, *DAG.getContext());
3990
3991   // Reserve stack space for the allocations in CCInfo.
3992   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
3993
3994   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
3995
3996   // Size of the linkage area, parameter list area and the part of the local
3997   // space variable where copies of aggregates which are passed by value are
3998   // stored.
3999   unsigned NumBytes = CCByValInfo.getNextStackOffset();
4000
4001   // Calculate by how many bytes the stack has to be adjusted in case of tail
4002   // call optimization.
4003   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4004
4005   // Adjust the stack pointer for the new arguments...
4006   // These operations are automatically eliminated by the prolog/epilog pass
4007   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
4008                                dl);
4009   SDValue CallSeqStart = Chain;
4010
4011   // Load the return address and frame pointer so it can be moved somewhere else
4012   // later.
4013   SDValue LROp, FPOp;
4014   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
4015                                        dl);
4016
4017   // Set up a copy of the stack pointer for use loading and storing any
4018   // arguments that may not fit in the registers available for argument
4019   // passing.
4020   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4021
4022   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4023   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4024   SmallVector<SDValue, 8> MemOpChains;
4025
4026   bool seenFloatArg = false;
4027   // Walk the register/memloc assignments, inserting copies/loads.
4028   for (unsigned i = 0, j = 0, e = ArgLocs.size();
4029        i != e;
4030        ++i) {
4031     CCValAssign &VA = ArgLocs[i];
4032     SDValue Arg = OutVals[i];
4033     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4034
4035     if (Flags.isByVal()) {
4036       // Argument is an aggregate which is passed by value, thus we need to
4037       // create a copy of it in the local variable space of the current stack
4038       // frame (which is the stack frame of the caller) and pass the address of
4039       // this copy to the callee.
4040       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
4041       CCValAssign &ByValVA = ByValArgLocs[j++];
4042       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
4043
4044       // Memory reserved in the local variable space of the callers stack frame.
4045       unsigned LocMemOffset = ByValVA.getLocMemOffset();
4046
4047       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
4048       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
4049
4050       // Create a copy of the argument in the local area of the current
4051       // stack frame.
4052       SDValue MemcpyCall =
4053         CreateCopyOfByValArgument(Arg, PtrOff,
4054                                   CallSeqStart.getNode()->getOperand(0),
4055                                   Flags, DAG, dl);
4056
4057       // This must go outside the CALLSEQ_START..END.
4058       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4059                            CallSeqStart.getNode()->getOperand(1),
4060                            SDLoc(MemcpyCall));
4061       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4062                              NewCallSeqStart.getNode());
4063       Chain = CallSeqStart = NewCallSeqStart;
4064
4065       // Pass the address of the aggregate copy on the stack either in a
4066       // physical register or in the parameter list area of the current stack
4067       // frame to the callee.
4068       Arg = PtrOff;
4069     }
4070
4071     if (VA.isRegLoc()) {
4072       if (Arg.getValueType() == MVT::i1)
4073         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
4074
4075       seenFloatArg |= VA.getLocVT().isFloatingPoint();
4076       // Put argument in a physical register.
4077       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4078     } else {
4079       // Put argument in the parameter list area of the current stack frame.
4080       assert(VA.isMemLoc());
4081       unsigned LocMemOffset = VA.getLocMemOffset();
4082
4083       if (!isTailCall) {
4084         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
4085         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
4086
4087         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
4088                                            MachinePointerInfo(),
4089                                            false, false, 0));
4090       } else {
4091         // Calculate and remember argument location.
4092         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
4093                                  TailCallArguments);
4094       }
4095     }
4096   }
4097
4098   if (!MemOpChains.empty())
4099     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4100
4101   // Build a sequence of copy-to-reg nodes chained together with token chain
4102   // and flag operands which copy the outgoing args into the appropriate regs.
4103   SDValue InFlag;
4104   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4105     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4106                              RegsToPass[i].second, InFlag);
4107     InFlag = Chain.getValue(1);
4108   }
4109
4110   // Set CR bit 6 to true if this is a vararg call with floating args passed in
4111   // registers.
4112   if (isVarArg) {
4113     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
4114     SDValue Ops[] = { Chain, InFlag };
4115
4116     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
4117                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
4118
4119     InFlag = Chain.getValue(1);
4120   }
4121
4122   if (isTailCall)
4123     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
4124                     false, TailCallArguments);
4125
4126   return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
4127                     RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
4128                     Ins, InVals);
4129 }
4130
4131 // Copy an argument into memory, being careful to do this outside the
4132 // call sequence for the call to which the argument belongs.
4133 SDValue
4134 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
4135                                               SDValue CallSeqStart,
4136                                               ISD::ArgFlagsTy Flags,
4137                                               SelectionDAG &DAG,
4138                                               SDLoc dl) const {
4139   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
4140                         CallSeqStart.getNode()->getOperand(0),
4141                         Flags, DAG, dl);
4142   // The MEMCPY must go outside the CALLSEQ_START..END.
4143   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4144                              CallSeqStart.getNode()->getOperand(1),
4145                              SDLoc(MemcpyCall));
4146   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4147                          NewCallSeqStart.getNode());
4148   return NewCallSeqStart;
4149 }
4150
4151 SDValue
4152 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
4153                                     CallingConv::ID CallConv, bool isVarArg,
4154                                     bool isTailCall,
4155                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4156                                     const SmallVectorImpl<SDValue> &OutVals,
4157                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4158                                     SDLoc dl, SelectionDAG &DAG,
4159                                     SmallVectorImpl<SDValue> &InVals) const {
4160
4161   bool isELFv2ABI = Subtarget.isELFv2ABI();
4162   bool isLittleEndian = Subtarget.isLittleEndian();
4163   unsigned NumOps = Outs.size();
4164
4165   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4166   unsigned PtrByteSize = 8;
4167
4168   MachineFunction &MF = DAG.getMachineFunction();
4169
4170   // Mark this function as potentially containing a function that contains a
4171   // tail call. As a consequence the frame pointer will be used for dynamicalloc
4172   // and restoring the callers stack pointer in this functions epilog. This is
4173   // done because by tail calling the called function might overwrite the value
4174   // in this function's (MF) stack pointer stack slot 0(SP).
4175   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4176       CallConv == CallingConv::Fast)
4177     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4178
4179   // Count how many bytes are to be pushed on the stack, including the linkage
4180   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
4181   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
4182   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
4183   unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
4184                                                           isELFv2ABI);
4185   unsigned NumBytes = LinkageSize;
4186
4187   // Add up all the space actually used.
4188   for (unsigned i = 0; i != NumOps; ++i) {
4189     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4190     EVT ArgVT = Outs[i].VT;
4191     EVT OrigVT = Outs[i].ArgVT;
4192
4193     /* Respect alignment of argument on the stack.  */
4194     unsigned Align =
4195       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4196     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
4197
4198     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4199     if (Flags.isInConsecutiveRegsLast())
4200       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4201   }
4202
4203   unsigned NumBytesActuallyUsed = NumBytes;
4204
4205   // The prolog code of the callee may store up to 8 GPR argument registers to
4206   // the stack, allowing va_start to index over them in memory if its varargs.
4207   // Because we cannot tell if this is needed on the caller side, we have to
4208   // conservatively assume that it is needed.  As such, make sure we have at
4209   // least enough stack space for the caller to store the 8 GPRs.
4210   // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
4211   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
4212
4213   // Tail call needs the stack to be aligned.
4214   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4215       CallConv == CallingConv::Fast)
4216     NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
4217
4218   // Calculate by how many bytes the stack has to be adjusted in case of tail
4219   // call optimization.
4220   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4221
4222   // To protect arguments on the stack from being clobbered in a tail call,
4223   // force all the loads to happen before doing any other lowering.
4224   if (isTailCall)
4225     Chain = DAG.getStackArgumentTokenFactor(Chain);
4226
4227   // Adjust the stack pointer for the new arguments...
4228   // These operations are automatically eliminated by the prolog/epilog pass
4229   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
4230                                dl);
4231   SDValue CallSeqStart = Chain;
4232
4233   // Load the return address and frame pointer so it can be move somewhere else
4234   // later.
4235   SDValue LROp, FPOp;
4236   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
4237                                        dl);
4238
4239   // Set up a copy of the stack pointer for use loading and storing any
4240   // arguments that may not fit in the registers available for argument
4241   // passing.
4242   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4243
4244   // Figure out which arguments are going to go in registers, and which in
4245   // memory.  Also, if this is a vararg function, floating point operations
4246   // must be stored to our stack, and loaded into integer regs as well, if
4247   // any integer regs are available for argument passing.
4248   unsigned ArgOffset = LinkageSize;
4249   unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
4250
4251   static const MCPhysReg GPR[] = {
4252     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4253     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4254   };
4255   static const MCPhysReg *FPR = GetFPR();
4256
4257   static const MCPhysReg VR[] = {
4258     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4259     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4260   };
4261   static const MCPhysReg VSRH[] = {
4262     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
4263     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
4264   };
4265
4266   const unsigned NumGPRs = array_lengthof(GPR);
4267   const unsigned NumFPRs = 13;
4268   const unsigned NumVRs  = array_lengthof(VR);
4269
4270   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4271   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4272
4273   SmallVector<SDValue, 8> MemOpChains;
4274   for (unsigned i = 0; i != NumOps; ++i) {
4275     SDValue Arg = OutVals[i];
4276     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4277     EVT ArgVT = Outs[i].VT;
4278     EVT OrigVT = Outs[i].ArgVT;
4279
4280     /* Respect alignment of argument on the stack.  */
4281     unsigned Align =
4282       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4283     ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
4284
4285     /* Compute GPR index associated with argument offset.  */
4286     GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4287     GPR_idx = std::min(GPR_idx, NumGPRs);
4288
4289     // PtrOff will be used to store the current argument to the stack if a
4290     // register cannot be found for it.
4291     SDValue PtrOff;
4292
4293     PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
4294
4295     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4296
4297     // Promote integers to 64-bit values.
4298     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
4299       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
4300       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4301       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
4302     }
4303
4304     // FIXME memcpy is used way more than necessary.  Correctness first.
4305     // Note: "by value" is code for passing a structure by value, not
4306     // basic types.
4307     if (Flags.isByVal()) {
4308       // Note: Size includes alignment padding, so
4309       //   struct x { short a; char b; }
4310       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
4311       // These are the proper values we need for right-justifying the
4312       // aggregate in a parameter register.
4313       unsigned Size = Flags.getByValSize();
4314
4315       // An empty aggregate parameter takes up no storage and no
4316       // registers.
4317       if (Size == 0)
4318         continue;
4319
4320       // All aggregates smaller than 8 bytes must be passed right-justified.
4321       if (Size==1 || Size==2 || Size==4) {
4322         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
4323         if (GPR_idx != NumGPRs) {
4324           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
4325                                         MachinePointerInfo(), VT,
4326                                         false, false, false, 0);
4327           MemOpChains.push_back(Load.getValue(1));
4328           RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
4329
4330           ArgOffset += PtrByteSize;
4331           continue;
4332         }
4333       }
4334
4335       if (GPR_idx == NumGPRs && Size < 8) {
4336         SDValue AddPtr = PtrOff;
4337         if (!isLittleEndian) {
4338           SDValue Const = DAG.getConstant(PtrByteSize - Size,
4339                                           PtrOff.getValueType());
4340           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4341         }
4342         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4343                                                           CallSeqStart,
4344                                                           Flags, DAG, dl);
4345         ArgOffset += PtrByteSize;
4346         continue;
4347       }
4348       // Copy entire object into memory.  There are cases where gcc-generated
4349       // code assumes it is there, even if it could be put entirely into
4350       // registers.  (This is not what the doc says.)
4351
4352       // FIXME: The above statement is likely due to a misunderstanding of the
4353       // documents.  All arguments must be copied into the parameter area BY
4354       // THE CALLEE in the event that the callee takes the address of any
4355       // formal argument.  That has not yet been implemented.  However, it is
4356       // reasonable to use the stack area as a staging area for the register
4357       // load.
4358
4359       // Skip this for small aggregates, as we will use the same slot for a
4360       // right-justified copy, below.
4361       if (Size >= 8)
4362         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
4363                                                           CallSeqStart,
4364                                                           Flags, DAG, dl);
4365
4366       // When a register is available, pass a small aggregate right-justified.
4367       if (Size < 8 && GPR_idx != NumGPRs) {
4368         // The easiest way to get this right-justified in a register
4369         // is to copy the structure into the rightmost portion of a
4370         // local variable slot, then load the whole slot into the
4371         // register.
4372         // FIXME: The memcpy seems to produce pretty awful code for
4373         // small aggregates, particularly for packed ones.
4374         // FIXME: It would be preferable to use the slot in the
4375         // parameter save area instead of a new local variable.
4376         SDValue AddPtr = PtrOff;
4377         if (!isLittleEndian) {
4378           SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
4379           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4380         }
4381         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4382                                                           CallSeqStart,
4383                                                           Flags, DAG, dl);
4384
4385         // Load the slot into the register.
4386         SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
4387                                    MachinePointerInfo(),
4388                                    false, false, false, 0);
4389         MemOpChains.push_back(Load.getValue(1));
4390         RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
4391
4392         // Done with this argument.
4393         ArgOffset += PtrByteSize;
4394         continue;
4395       }
4396
4397       // For aggregates larger than PtrByteSize, copy the pieces of the
4398       // object that fit into registers from the parameter save area.
4399       for (unsigned j=0; j<Size; j+=PtrByteSize) {
4400         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
4401         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
4402         if (GPR_idx != NumGPRs) {
4403           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
4404                                      MachinePointerInfo(),
4405                                      false, false, false, 0);
4406           MemOpChains.push_back(Load.getValue(1));
4407           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4408           ArgOffset += PtrByteSize;
4409         } else {
4410           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
4411           break;
4412         }
4413       }
4414       continue;
4415     }
4416
4417     switch (Arg.getSimpleValueType().SimpleTy) {
4418     default: llvm_unreachable("Unexpected ValueType for argument!");
4419     case MVT::i1:
4420     case MVT::i32:
4421     case MVT::i64:
4422       // These can be scalar arguments or elements of an integer array type
4423       // passed directly.  Clang may use those instead of "byval" aggregate
4424       // types to avoid forcing arguments to memory unnecessarily.
4425       if (GPR_idx != NumGPRs) {
4426         RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
4427       } else {
4428         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4429                          true, isTailCall, false, MemOpChains,
4430                          TailCallArguments, dl);
4431       }
4432       ArgOffset += PtrByteSize;
4433       break;
4434     case MVT::f32:
4435     case MVT::f64: {
4436       // These can be scalar arguments or elements of a float array type
4437       // passed directly.  The latter are used to implement ELFv2 homogenous
4438       // float aggregates.
4439
4440       // Named arguments go into FPRs first, and once they overflow, the
4441       // remaining arguments go into GPRs and then the parameter save area.
4442       // Unnamed arguments for vararg functions always go to GPRs and
4443       // then the parameter save area.  For now, put all arguments to vararg
4444       // routines always in both locations (FPR *and* GPR or stack slot).
4445       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
4446
4447       // First load the argument into the next available FPR.
4448       if (FPR_idx != NumFPRs)
4449         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
4450
4451       // Next, load the argument into GPR or stack slot if needed.
4452       if (!NeedGPROrStack)
4453         ;
4454       else if (GPR_idx != NumGPRs) {
4455         // In the non-vararg case, this can only ever happen in the
4456         // presence of f32 array types, since otherwise we never run
4457         // out of FPRs before running out of GPRs.
4458         SDValue ArgVal;
4459
4460         // Double values are always passed in a single GPR.
4461         if (Arg.getValueType() != MVT::f32) {
4462           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
4463
4464         // Non-array float values are extended and passed in a GPR.
4465         } else if (!Flags.isInConsecutiveRegs()) {
4466           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
4467           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
4468
4469         // If we have an array of floats, we collect every odd element
4470         // together with its predecessor into one GPR.
4471         } else if (ArgOffset % PtrByteSize != 0) {
4472           SDValue Lo, Hi;
4473           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
4474           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
4475           if (!isLittleEndian)
4476             std::swap(Lo, Hi);
4477           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
4478
4479         // The final element, if even, goes into the first half of a GPR.
4480         } else if (Flags.isInConsecutiveRegsLast()) {
4481           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
4482           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
4483           if (!isLittleEndian)
4484             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
4485                                  DAG.getConstant(32, MVT::i32));
4486
4487         // Non-final even elements are skipped; they will be handled
4488         // together the with subsequent argument on the next go-around.
4489         } else
4490           ArgVal = SDValue();
4491
4492         if (ArgVal.getNode())
4493           RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal));
4494       } else {
4495         // Single-precision floating-point values are mapped to the
4496         // second (rightmost) word of the stack doubleword.
4497         if (Arg.getValueType() == MVT::f32 &&
4498             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
4499           SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
4500           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
4501         }
4502
4503         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4504                          true, isTailCall, false, MemOpChains,
4505                          TailCallArguments, dl);
4506       }
4507       // When passing an array of floats, the array occupies consecutive
4508       // space in the argument area; only round up to the next doubleword
4509       // at the end of the array.  Otherwise, each float takes 8 bytes.
4510       ArgOffset += (Arg.getValueType() == MVT::f32 &&
4511                     Flags.isInConsecutiveRegs()) ? 4 : 8;
4512       if (Flags.isInConsecutiveRegsLast())
4513         ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4514       break;
4515     }
4516     case MVT::v4f32:
4517     case MVT::v4i32:
4518     case MVT::v8i16:
4519     case MVT::v16i8:
4520     case MVT::v2f64:
4521     case MVT::v2i64:
4522       // These can be scalar arguments or elements of a vector array type
4523       // passed directly.  The latter are used to implement ELFv2 homogenous
4524       // vector aggregates.
4525
4526       // For a varargs call, named arguments go into VRs or on the stack as
4527       // usual; unnamed arguments always go to the stack or the corresponding
4528       // GPRs when within range.  For now, we always put the value in both
4529       // locations (or even all three).
4530       if (isVarArg) {
4531         // We could elide this store in the case where the object fits
4532         // entirely in R registers.  Maybe later.
4533         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
4534                                      MachinePointerInfo(), false, false, 0);
4535         MemOpChains.push_back(Store);
4536         if (VR_idx != NumVRs) {
4537           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
4538                                      MachinePointerInfo(),
4539                                      false, false, false, 0);
4540           MemOpChains.push_back(Load.getValue(1));
4541
4542           unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
4543                            Arg.getSimpleValueType() == MVT::v2i64) ?
4544                           VSRH[VR_idx] : VR[VR_idx];
4545           ++VR_idx;
4546
4547           RegsToPass.push_back(std::make_pair(VReg, Load));
4548         }
4549         ArgOffset += 16;
4550         for (unsigned i=0; i<16; i+=PtrByteSize) {
4551           if (GPR_idx == NumGPRs)
4552             break;
4553           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
4554                                   DAG.getConstant(i, PtrVT));
4555           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
4556                                      false, false, false, 0);
4557           MemOpChains.push_back(Load.getValue(1));
4558           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4559         }
4560         break;
4561       }
4562
4563       // Non-varargs Altivec params go into VRs or on the stack.
4564       if (VR_idx != NumVRs) {
4565         unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
4566                          Arg.getSimpleValueType() == MVT::v2i64) ?
4567                         VSRH[VR_idx] : VR[VR_idx];
4568         ++VR_idx;
4569
4570         RegsToPass.push_back(std::make_pair(VReg, Arg));
4571       } else {
4572         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4573                          true, isTailCall, true, MemOpChains,
4574                          TailCallArguments, dl);
4575       }
4576       ArgOffset += 16;
4577       break;
4578     }
4579   }
4580
4581   assert(NumBytesActuallyUsed == ArgOffset);
4582   (void)NumBytesActuallyUsed;
4583
4584   if (!MemOpChains.empty())
4585     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4586
4587   // Check if this is an indirect call (MTCTR/BCTRL).
4588   // See PrepareCall() for more information about calls through function
4589   // pointers in the 64-bit SVR4 ABI.
4590   if (!isTailCall &&
4591       !dyn_cast<GlobalAddressSDNode>(Callee) &&
4592       !dyn_cast<ExternalSymbolSDNode>(Callee)) {
4593     // Load r2 into a virtual register and store it to the TOC save area.
4594     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
4595     // TOC save area offset.
4596     unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
4597     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
4598     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4599     Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
4600                          false, false, 0);
4601     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
4602     // This does not mean the MTCTR instruction must use R12; it's easier
4603     // to model this as an extra parameter, so do that.
4604     if (isELFv2ABI)
4605       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
4606   }
4607
4608   // Build a sequence of copy-to-reg nodes chained together with token chain
4609   // and flag operands which copy the outgoing args into the appropriate regs.
4610   SDValue InFlag;
4611   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4612     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4613                              RegsToPass[i].second, InFlag);
4614     InFlag = Chain.getValue(1);
4615   }
4616
4617   if (isTailCall)
4618     PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
4619                     FPOp, true, TailCallArguments);
4620
4621   return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
4622                     RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
4623                     Ins, InVals);
4624 }
4625
4626 SDValue
4627 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
4628                                     CallingConv::ID CallConv, bool isVarArg,
4629                                     bool isTailCall,
4630                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4631                                     const SmallVectorImpl<SDValue> &OutVals,
4632                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4633                                     SDLoc dl, SelectionDAG &DAG,
4634                                     SmallVectorImpl<SDValue> &InVals) const {
4635
4636   unsigned NumOps = Outs.size();
4637
4638   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4639   bool isPPC64 = PtrVT == MVT::i64;
4640   unsigned PtrByteSize = isPPC64 ? 8 : 4;
4641
4642   MachineFunction &MF = DAG.getMachineFunction();
4643
4644   // Mark this function as potentially containing a function that contains a
4645   // tail call. As a consequence the frame pointer will be used for dynamicalloc
4646   // and restoring the callers stack pointer in this functions epilog. This is
4647   // done because by tail calling the called function might overwrite the value
4648   // in this function's (MF) stack pointer stack slot 0(SP).
4649   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4650       CallConv == CallingConv::Fast)
4651     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4652
4653   // Count how many bytes are to be pushed on the stack, including the linkage
4654   // area, and parameter passing area.  We start with 24/48 bytes, which is
4655   // prereserved space for [SP][CR][LR][3 x unused].
4656   unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
4657                                                           false);
4658   unsigned NumBytes = LinkageSize;
4659
4660   // Add up all the space actually used.
4661   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
4662   // they all go in registers, but we must reserve stack space for them for
4663   // possible use by the caller.  In varargs or 64-bit calls, parameters are
4664   // assigned stack space in order, with padding so Altivec parameters are
4665   // 16-byte aligned.
4666   unsigned nAltivecParamsAtEnd = 0;
4667   for (unsigned i = 0; i != NumOps; ++i) {
4668     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4669     EVT ArgVT = Outs[i].VT;
4670     // Varargs Altivec parameters are padded to a 16 byte boundary.
4671     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4672         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4673         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
4674       if (!isVarArg && !isPPC64) {
4675         // Non-varargs Altivec parameters go after all the non-Altivec
4676         // parameters; handle those later so we know how much padding we need.
4677         nAltivecParamsAtEnd++;
4678         continue;
4679       }
4680       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
4681       NumBytes = ((NumBytes+15)/16)*16;
4682     }
4683     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4684   }
4685
4686   // Allow for Altivec parameters at the end, if needed.
4687   if (nAltivecParamsAtEnd) {
4688     NumBytes = ((NumBytes+15)/16)*16;
4689     NumBytes += 16*nAltivecParamsAtEnd;
4690   }
4691
4692   // The prolog code of the callee may store up to 8 GPR argument registers to
4693   // the stack, allowing va_start to index over them in memory if its varargs.
4694   // Because we cannot tell if this is needed on the caller side, we have to
4695   // conservatively assume that it is needed.  As such, make sure we have at
4696   // least enough stack space for the caller to store the 8 GPRs.
4697   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
4698
4699   // Tail call needs the stack to be aligned.
4700   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4701       CallConv == CallingConv::Fast)
4702     NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
4703
4704   // Calculate by how many bytes the stack has to be adjusted in case of tail
4705   // call optimization.
4706   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4707
4708   // To protect arguments on the stack from being clobbered in a tail call,
4709   // force all the loads to happen before doing any other lowering.
4710   if (isTailCall)
4711     Chain = DAG.getStackArgumentTokenFactor(Chain);
4712
4713   // Adjust the stack pointer for the new arguments...
4714   // These operations are automatically eliminated by the prolog/epilog pass
4715   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
4716                                dl);
4717   SDValue CallSeqStart = Chain;
4718
4719   // Load the return address and frame pointer so it can be move somewhere else
4720   // later.
4721   SDValue LROp, FPOp;
4722   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
4723                                        dl);
4724
4725   // Set up a copy of the stack pointer for use loading and storing any
4726   // arguments that may not fit in the registers available for argument
4727   // passing.
4728   SDValue StackPtr;
4729   if (isPPC64)
4730     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4731   else
4732     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4733
4734   // Figure out which arguments are going to go in registers, and which in
4735   // memory.  Also, if this is a vararg function, floating point operations
4736   // must be stored to our stack, and loaded into integer regs as well, if
4737   // any integer regs are available for argument passing.
4738   unsigned ArgOffset = LinkageSize;
4739   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4740
4741   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
4742     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4743     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4744   };
4745   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
4746     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4747     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4748   };
4749   static const MCPhysReg *FPR = GetFPR();
4750
4751   static const MCPhysReg VR[] = {
4752     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4753     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4754   };
4755   const unsigned NumGPRs = array_lengthof(GPR_32);
4756   const unsigned NumFPRs = 13;
4757   const unsigned NumVRs  = array_lengthof(VR);
4758
4759   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
4760
4761   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4762   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4763
4764   SmallVector<SDValue, 8> MemOpChains;
4765   for (unsigned i = 0; i != NumOps; ++i) {
4766     SDValue Arg = OutVals[i];
4767     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4768
4769     // PtrOff will be used to store the current argument to the stack if a
4770     // register cannot be found for it.
4771     SDValue PtrOff;
4772
4773     PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
4774
4775     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4776
4777     // On PPC64, promote integers to 64-bit values.
4778     if (isPPC64 && Arg.getValueType() == MVT::i32) {
4779       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
4780       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4781       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
4782     }
4783
4784     // FIXME memcpy is used way more than necessary.  Correctness first.
4785     // Note: "by value" is code for passing a structure by value, not
4786     // basic types.
4787     if (Flags.isByVal()) {
4788       unsigned Size = Flags.getByValSize();
4789       // Very small objects are passed right-justified.  Everything else is
4790       // passed left-justified.
4791       if (Size==1 || Size==2) {
4792         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
4793         if (GPR_idx != NumGPRs) {
4794           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
4795                                         MachinePointerInfo(), VT,
4796                                         false, false, false, 0);
4797           MemOpChains.push_back(Load.getValue(1));
4798           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4799
4800           ArgOffset += PtrByteSize;
4801         } else {
4802           SDValue Const = DAG.getConstant(PtrByteSize - Size,
4803                                           PtrOff.getValueType());
4804           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4805           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4806                                                             CallSeqStart,
4807                                                             Flags, DAG, dl);
4808           ArgOffset += PtrByteSize;
4809         }
4810         continue;
4811       }
4812       // Copy entire object into memory.  There are cases where gcc-generated
4813       // code assumes it is there, even if it could be put entirely into
4814       // registers.  (This is not what the doc says.)
4815       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
4816                                                         CallSeqStart,
4817                                                         Flags, DAG, dl);
4818
4819       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
4820       // copy the pieces of the object that fit into registers from the
4821       // parameter save area.
4822       for (unsigned j=0; j<Size; j+=PtrByteSize) {
4823         SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
4824         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
4825         if (GPR_idx != NumGPRs) {
4826           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
4827                                      MachinePointerInfo(),
4828                                      false, false, false, 0);
4829           MemOpChains.push_back(Load.getValue(1));
4830           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4831           ArgOffset += PtrByteSize;
4832         } else {
4833           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
4834           break;
4835         }
4836       }
4837       continue;
4838     }
4839
4840     switch (Arg.getSimpleValueType().SimpleTy) {
4841     default: llvm_unreachable("Unexpected ValueType for argument!");
4842     case MVT::i1:
4843     case MVT::i32:
4844     case MVT::i64:
4845       if (GPR_idx != NumGPRs) {
4846         if (Arg.getValueType() == MVT::i1)
4847           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
4848
4849         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
4850       } else {
4851         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4852                          isPPC64, isTailCall, false, MemOpChains,
4853                          TailCallArguments, dl);
4854       }
4855       ArgOffset += PtrByteSize;
4856       break;
4857     case MVT::f32:
4858     case MVT::f64:
4859       if (FPR_idx != NumFPRs) {
4860         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
4861
4862         if (isVarArg) {
4863           SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
4864                                        MachinePointerInfo(), false, false, 0);
4865           MemOpChains.push_back(Store);
4866
4867           // Float varargs are always shadowed in available integer registers
4868           if (GPR_idx != NumGPRs) {
4869             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
4870                                        MachinePointerInfo(), false, false,
4871                                        false, 0);
4872             MemOpChains.push_back(Load.getValue(1));
4873             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4874           }
4875           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
4876             SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
4877             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
4878             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
4879                                        MachinePointerInfo(),
4880                                        false, false, false, 0);
4881             MemOpChains.push_back(Load.getValue(1));
4882             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4883           }
4884         } else {
4885           // If we have any FPRs remaining, we may also have GPRs remaining.
4886           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
4887           // GPRs.
4888           if (GPR_idx != NumGPRs)
4889             ++GPR_idx;
4890           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
4891               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
4892             ++GPR_idx;
4893         }
4894       } else
4895         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4896                          isPPC64, isTailCall, false, MemOpChains,
4897                          TailCallArguments, dl);
4898       if (isPPC64)
4899         ArgOffset += 8;
4900       else
4901         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
4902       break;
4903     case MVT::v4f32:
4904     case MVT::v4i32:
4905     case MVT::v8i16:
4906     case MVT::v16i8:
4907       if (isVarArg) {
4908         // These go aligned on the stack, or in the corresponding R registers
4909         // when within range.  The Darwin PPC ABI doc claims they also go in
4910         // V registers; in fact gcc does this only for arguments that are
4911         // prototyped, not for those that match the ...  We do it for all
4912         // arguments, seems to work.
4913         while (ArgOffset % 16 !=0) {
4914           ArgOffset += PtrByteSize;
4915           if (GPR_idx != NumGPRs)
4916             GPR_idx++;
4917         }
4918         // We could elide this store in the case where the object fits
4919         // entirely in R registers.  Maybe later.
4920         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4921                             DAG.getConstant(ArgOffset, PtrVT));
4922         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
4923                                      MachinePointerInfo(), false, false, 0);
4924         MemOpChains.push_back(Store);
4925         if (VR_idx != NumVRs) {
4926           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
4927                                      MachinePointerInfo(),
4928                                      false, false, false, 0);
4929           MemOpChains.push_back(Load.getValue(1));
4930           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
4931         }
4932         ArgOffset += 16;
4933         for (unsigned i=0; i<16; i+=PtrByteSize) {
4934           if (GPR_idx == NumGPRs)
4935             break;
4936           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
4937                                   DAG.getConstant(i, PtrVT));
4938           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
4939                                      false, false, false, 0);
4940           MemOpChains.push_back(Load.getValue(1));
4941           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4942         }
4943         break;
4944       }
4945
4946       // Non-varargs Altivec params generally go in registers, but have
4947       // stack space allocated at the end.
4948       if (VR_idx != NumVRs) {
4949         // Doesn't have GPR space allocated.
4950         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
4951       } else if (nAltivecParamsAtEnd==0) {
4952         // We are emitting Altivec params in order.
4953         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4954                          isPPC64, isTailCall, true, MemOpChains,
4955                          TailCallArguments, dl);
4956         ArgOffset += 16;
4957       }
4958       break;
4959     }
4960   }
4961   // If all Altivec parameters fit in registers, as they usually do,
4962   // they get stack space following the non-Altivec parameters.  We
4963   // don't track this here because nobody below needs it.
4964   // If there are more Altivec parameters than fit in registers emit
4965   // the stores here.
4966   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
4967     unsigned j = 0;
4968     // Offset is aligned; skip 1st 12 params which go in V registers.
4969     ArgOffset = ((ArgOffset+15)/16)*16;
4970     ArgOffset += 12*16;
4971     for (unsigned i = 0; i != NumOps; ++i) {
4972       SDValue Arg = OutVals[i];
4973       EVT ArgType = Outs[i].VT;
4974       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
4975           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
4976         if (++j > NumVRs) {
4977           SDValue PtrOff;
4978           // We are emitting Altivec params in order.
4979           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4980                            isPPC64, isTailCall, true, MemOpChains,
4981                            TailCallArguments, dl);
4982           ArgOffset += 16;
4983         }
4984       }
4985     }
4986   }
4987
4988   if (!MemOpChains.empty())
4989     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4990
4991   // On Darwin, R12 must contain the address of an indirect callee.  This does
4992   // not mean the MTCTR instruction must use R12; it's easier to model this as
4993   // an extra parameter, so do that.
4994   if (!isTailCall &&
4995       !dyn_cast<GlobalAddressSDNode>(Callee) &&
4996       !dyn_cast<ExternalSymbolSDNode>(Callee) &&
4997       !isBLACompatibleAddress(Callee, DAG))
4998     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
4999                                                    PPC::R12), Callee));
5000
5001   // Build a sequence of copy-to-reg nodes chained together with token chain
5002   // and flag operands which copy the outgoing args into the appropriate regs.
5003   SDValue InFlag;
5004   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5005     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5006                              RegsToPass[i].second, InFlag);
5007     InFlag = Chain.getValue(1);
5008   }
5009
5010   if (isTailCall)
5011     PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
5012                     FPOp, true, TailCallArguments);
5013
5014   return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
5015                     RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
5016                     Ins, InVals);
5017 }
5018
5019 bool
5020 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
5021                                   MachineFunction &MF, bool isVarArg,
5022                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
5023                                   LLVMContext &Context) const {
5024   SmallVector<CCValAssign, 16> RVLocs;
5025   CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
5026                  RVLocs, Context);
5027   return CCInfo.CheckReturn(Outs, RetCC_PPC);
5028 }
5029
5030 SDValue
5031 PPCTargetLowering::LowerReturn(SDValue Chain,
5032                                CallingConv::ID CallConv, bool isVarArg,
5033                                const SmallVectorImpl<ISD::OutputArg> &Outs,
5034                                const SmallVectorImpl<SDValue> &OutVals,
5035                                SDLoc dl, SelectionDAG &DAG) const {
5036
5037   SmallVector<CCValAssign, 16> RVLocs;
5038   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
5039                  getTargetMachine(), RVLocs, *DAG.getContext());
5040   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
5041
5042   SDValue Flag;
5043   SmallVector<SDValue, 4> RetOps(1, Chain);
5044
5045   // Copy the result values into the output registers.
5046   for (unsigned i = 0; i != RVLocs.size(); ++i) {
5047     CCValAssign &VA = RVLocs[i];
5048     assert(VA.isRegLoc() && "Can only return in registers!");
5049
5050     SDValue Arg = OutVals[i];
5051
5052     switch (VA.getLocInfo()) {
5053     default: llvm_unreachable("Unknown loc info!");
5054     case CCValAssign::Full: break;
5055     case CCValAssign::AExt:
5056       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
5057       break;
5058     case CCValAssign::ZExt:
5059       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
5060       break;
5061     case CCValAssign::SExt:
5062       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
5063       break;
5064     }
5065
5066     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
5067     Flag = Chain.getValue(1);
5068     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5069   }
5070
5071   RetOps[0] = Chain;  // Update chain.
5072
5073   // Add the flag if we have it.
5074   if (Flag.getNode())
5075     RetOps.push_back(Flag);
5076
5077   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
5078 }
5079
5080 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
5081                                    const PPCSubtarget &Subtarget) const {
5082   // When we pop the dynamic allocation we need to restore the SP link.
5083   SDLoc dl(Op);
5084
5085   // Get the corect type for pointers.
5086   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5087
5088   // Construct the stack pointer operand.
5089   bool isPPC64 = Subtarget.isPPC64();
5090   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
5091   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
5092
5093   // Get the operands for the STACKRESTORE.
5094   SDValue Chain = Op.getOperand(0);
5095   SDValue SaveSP = Op.getOperand(1);
5096
5097   // Load the old link SP.
5098   SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
5099                                    MachinePointerInfo(),
5100                                    false, false, false, 0);
5101
5102   // Restore the stack pointer.
5103   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
5104
5105   // Store the old link SP.
5106   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
5107                       false, false, 0);
5108 }
5109
5110
5111
5112 SDValue
5113 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
5114   MachineFunction &MF = DAG.getMachineFunction();
5115   bool isPPC64 = Subtarget.isPPC64();
5116   bool isDarwinABI = Subtarget.isDarwinABI();
5117   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5118
5119   // Get current frame pointer save index.  The users of this index will be
5120   // primarily DYNALLOC instructions.
5121   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
5122   int RASI = FI->getReturnAddrSaveIndex();
5123
5124   // If the frame pointer save index hasn't been defined yet.
5125   if (!RASI) {
5126     // Find out what the fix offset of the frame pointer save area.
5127     int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
5128     // Allocate the frame index for frame pointer save area.
5129     RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
5130     // Save the result.
5131     FI->setReturnAddrSaveIndex(RASI);
5132   }
5133   return DAG.getFrameIndex(RASI, PtrVT);
5134 }
5135
5136 SDValue
5137 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
5138   MachineFunction &MF = DAG.getMachineFunction();
5139   bool isPPC64 = Subtarget.isPPC64();
5140   bool isDarwinABI = Subtarget.isDarwinABI();
5141   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5142
5143   // Get current frame pointer save index.  The users of this index will be
5144   // primarily DYNALLOC instructions.
5145   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
5146   int FPSI = FI->getFramePointerSaveIndex();
5147
5148   // If the frame pointer save index hasn't been defined yet.
5149   if (!FPSI) {
5150     // Find out what the fix offset of the frame pointer save area.
5151     int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
5152                                                            isDarwinABI);
5153
5154     // Allocate the frame index for frame pointer save area.
5155     FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
5156     // Save the result.
5157     FI->setFramePointerSaveIndex(FPSI);
5158   }
5159   return DAG.getFrameIndex(FPSI, PtrVT);
5160 }
5161
5162 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
5163                                          SelectionDAG &DAG,
5164                                          const PPCSubtarget &Subtarget) const {
5165   // Get the inputs.
5166   SDValue Chain = Op.getOperand(0);
5167   SDValue Size  = Op.getOperand(1);
5168   SDLoc dl(Op);
5169
5170   // Get the corect type for pointers.
5171   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5172   // Negate the size.
5173   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
5174                                   DAG.getConstant(0, PtrVT), Size);
5175   // Construct a node for the frame pointer save index.
5176   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
5177   // Build a DYNALLOC node.
5178   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
5179   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
5180   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
5181 }
5182
5183 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
5184                                                SelectionDAG &DAG) const {
5185   SDLoc DL(Op);
5186   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
5187                      DAG.getVTList(MVT::i32, MVT::Other),
5188                      Op.getOperand(0), Op.getOperand(1));
5189 }
5190
5191 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
5192                                                 SelectionDAG &DAG) const {
5193   SDLoc DL(Op);
5194   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
5195                      Op.getOperand(0), Op.getOperand(1));
5196 }
5197
5198 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
5199   assert(Op.getValueType() == MVT::i1 &&
5200          "Custom lowering only for i1 loads");
5201
5202   // First, load 8 bits into 32 bits, then truncate to 1 bit.
5203
5204   SDLoc dl(Op);
5205   LoadSDNode *LD = cast<LoadSDNode>(Op);
5206
5207   SDValue Chain = LD->getChain();
5208   SDValue BasePtr = LD->getBasePtr();
5209   MachineMemOperand *MMO = LD->getMemOperand();
5210
5211   SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
5212                                  BasePtr, MVT::i8, MMO);
5213   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
5214
5215   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
5216   return DAG.getMergeValues(Ops, dl);
5217 }
5218
5219 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
5220   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
5221          "Custom lowering only for i1 stores");
5222
5223   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
5224
5225   SDLoc dl(Op);
5226   StoreSDNode *ST = cast<StoreSDNode>(Op);
5227
5228   SDValue Chain = ST->getChain();
5229   SDValue BasePtr = ST->getBasePtr();
5230   SDValue Value = ST->getValue();
5231   MachineMemOperand *MMO = ST->getMemOperand();
5232
5233   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
5234   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
5235 }
5236
5237 // FIXME: Remove this once the ANDI glue bug is fixed:
5238 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
5239   assert(Op.getValueType() == MVT::i1 &&
5240          "Custom lowering only for i1 results");
5241
5242   SDLoc DL(Op);
5243   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
5244                      Op.getOperand(0));
5245 }
5246
5247 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
5248 /// possible.
5249 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5250   // Not FP? Not a fsel.
5251   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
5252       !Op.getOperand(2).getValueType().isFloatingPoint())
5253     return Op;
5254
5255   // We might be able to do better than this under some circumstances, but in
5256   // general, fsel-based lowering of select is a finite-math-only optimization.
5257   // For more information, see section F.3 of the 2.06 ISA specification.
5258   if (!DAG.getTarget().Options.NoInfsFPMath ||
5259       !DAG.getTarget().Options.NoNaNsFPMath)
5260     return Op;
5261
5262   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5263
5264   EVT ResVT = Op.getValueType();
5265   EVT CmpVT = Op.getOperand(0).getValueType();
5266   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
5267   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
5268   SDLoc dl(Op);
5269
5270   // If the RHS of the comparison is a 0.0, we don't need to do the
5271   // subtraction at all.
5272   SDValue Sel1;
5273   if (isFloatingPointZero(RHS))
5274     switch (CC) {
5275     default: break;       // SETUO etc aren't handled by fsel.
5276     case ISD::SETNE:
5277       std::swap(TV, FV);
5278     case ISD::SETEQ:
5279       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
5280         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
5281       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
5282       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
5283         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
5284       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
5285                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
5286     case ISD::SETULT:
5287     case ISD::SETLT:
5288       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
5289     case ISD::SETOGE:
5290     case ISD::SETGE:
5291       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
5292         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
5293       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
5294     case ISD::SETUGT:
5295     case ISD::SETGT:
5296       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
5297     case ISD::SETOLE:
5298     case ISD::SETLE:
5299       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
5300         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
5301       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
5302                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
5303     }
5304
5305   SDValue Cmp;
5306   switch (CC) {
5307   default: break;       // SETUO etc aren't handled by fsel.
5308   case ISD::SETNE:
5309     std::swap(TV, FV);
5310   case ISD::SETEQ:
5311     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
5312     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5313       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5314     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
5315     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
5316       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
5317     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
5318                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
5319   case ISD::SETULT:
5320   case ISD::SETLT:
5321     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
5322     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5323       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5324     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
5325   case ISD::SETOGE:
5326   case ISD::SETGE:
5327     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
5328     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5329       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5330     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
5331   case ISD::SETUGT:
5332   case ISD::SETGT:
5333     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
5334     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5335       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5336     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
5337   case ISD::SETOLE:
5338   case ISD::SETLE:
5339     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
5340     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5341       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5342     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
5343   }
5344   return Op;
5345 }
5346
5347 // FIXME: Split this code up when LegalizeDAGTypes lands.
5348 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
5349                                            SDLoc dl) const {
5350   assert(Op.getOperand(0).getValueType().isFloatingPoint());
5351   SDValue Src = Op.getOperand(0);
5352   if (Src.getValueType() == MVT::f32)
5353     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
5354
5355   SDValue Tmp;
5356   switch (Op.getSimpleValueType().SimpleTy) {
5357   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
5358   case MVT::i32:
5359     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
5360                         (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ :
5361                                                    PPCISD::FCTIDZ),
5362                       dl, MVT::f64, Src);
5363     break;
5364   case MVT::i64:
5365     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
5366            "i64 FP_TO_UINT is supported only with FPCVT");
5367     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
5368                                                         PPCISD::FCTIDUZ,
5369                       dl, MVT::f64, Src);
5370     break;
5371   }
5372
5373   // Convert the FP value to an int value through memory.
5374   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
5375     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
5376   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
5377   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
5378   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
5379
5380   // Emit a store to the stack slot.
5381   SDValue Chain;
5382   if (i32Stack) {
5383     MachineFunction &MF = DAG.getMachineFunction();
5384     MachineMemOperand *MMO =
5385       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
5386     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
5387     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
5388               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
5389   } else
5390     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
5391                          MPI, false, false, 0);
5392
5393   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
5394   // add in a bias.
5395   if (Op.getValueType() == MVT::i32 && !i32Stack) {
5396     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
5397                         DAG.getConstant(4, FIPtr.getValueType()));
5398     MPI = MachinePointerInfo();
5399   }
5400
5401   return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
5402                      false, false, false, 0);
5403 }
5404
5405 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
5406                                            SelectionDAG &DAG) const {
5407   SDLoc dl(Op);
5408   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
5409   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
5410     return SDValue();
5411
5412   if (Op.getOperand(0).getValueType() == MVT::i1)
5413     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
5414                        DAG.getConstantFP(1.0, Op.getValueType()),
5415                        DAG.getConstantFP(0.0, Op.getValueType()));
5416
5417   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
5418          "UINT_TO_FP is supported only with FPCVT");
5419
5420   // If we have FCFIDS, then use it when converting to single-precision.
5421   // Otherwise, convert to double-precision and then round.
5422   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
5423                    (Op.getOpcode() == ISD::UINT_TO_FP ?
5424                     PPCISD::FCFIDUS : PPCISD::FCFIDS) :
5425                    (Op.getOpcode() == ISD::UINT_TO_FP ?
5426                     PPCISD::FCFIDU : PPCISD::FCFID);
5427   MVT      FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
5428                    MVT::f32 : MVT::f64;
5429
5430   if (Op.getOperand(0).getValueType() == MVT::i64) {
5431     SDValue SINT = Op.getOperand(0);
5432     // When converting to single-precision, we actually need to convert
5433     // to double-precision first and then round to single-precision.
5434     // To avoid double-rounding effects during that operation, we have
5435     // to prepare the input operand.  Bits that might be truncated when
5436     // converting to double-precision are replaced by a bit that won't
5437     // be lost at this stage, but is below the single-precision rounding
5438     // position.
5439     //
5440     // However, if -enable-unsafe-fp-math is in effect, accept double
5441     // rounding to avoid the extra overhead.
5442     if (Op.getValueType() == MVT::f32 &&
5443         !Subtarget.hasFPCVT() &&
5444         !DAG.getTarget().Options.UnsafeFPMath) {
5445
5446       // Twiddle input to make sure the low 11 bits are zero.  (If this
5447       // is the case, we are guaranteed the value will fit into the 53 bit
5448       // mantissa of an IEEE double-precision value without rounding.)
5449       // If any of those low 11 bits were not zero originally, make sure
5450       // bit 12 (value 2048) is set instead, so that the final rounding
5451       // to single-precision gets the correct result.
5452       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
5453                                   SINT, DAG.getConstant(2047, MVT::i64));
5454       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
5455                           Round, DAG.getConstant(2047, MVT::i64));
5456       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
5457       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
5458                           Round, DAG.getConstant(-2048, MVT::i64));
5459
5460       // However, we cannot use that value unconditionally: if the magnitude
5461       // of the input value is small, the bit-twiddling we did above might
5462       // end up visibly changing the output.  Fortunately, in that case, we
5463       // don't need to twiddle bits since the original input will convert
5464       // exactly to double-precision floating-point already.  Therefore,
5465       // construct a conditional to use the original value if the top 11
5466       // bits are all sign-bit copies, and use the rounded value computed
5467       // above otherwise.
5468       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
5469                                  SINT, DAG.getConstant(53, MVT::i32));
5470       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
5471                          Cond, DAG.getConstant(1, MVT::i64));
5472       Cond = DAG.getSetCC(dl, MVT::i32,
5473                           Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
5474
5475       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
5476     }
5477
5478     SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
5479     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
5480
5481     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
5482       FP = DAG.getNode(ISD::FP_ROUND, dl,
5483                        MVT::f32, FP, DAG.getIntPtrConstant(0));
5484     return FP;
5485   }
5486
5487   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
5488          "Unhandled INT_TO_FP type in custom expander!");
5489   // Since we only generate this in 64-bit mode, we can take advantage of
5490   // 64-bit registers.  In particular, sign extend the input value into the
5491   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
5492   // then lfd it and fcfid it.
5493   MachineFunction &MF = DAG.getMachineFunction();
5494   MachineFrameInfo *FrameInfo = MF.getFrameInfo();
5495   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5496
5497   SDValue Ld;
5498   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
5499     int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
5500     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
5501
5502     SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
5503                                  MachinePointerInfo::getFixedStack(FrameIdx),
5504                                  false, false, 0);
5505
5506     assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
5507            "Expected an i32 store");
5508     MachineMemOperand *MMO =
5509       MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
5510                               MachineMemOperand::MOLoad, 4, 4);
5511     SDValue Ops[] = { Store, FIdx };
5512     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
5513                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
5514                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
5515                                  Ops, MVT::i32, MMO);
5516   } else {
5517     assert(Subtarget.isPPC64() &&
5518            "i32->FP without LFIWAX supported only on PPC64");
5519
5520     int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
5521     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
5522
5523     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
5524                                 Op.getOperand(0));
5525
5526     // STD the extended value into the stack slot.
5527     SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
5528                                  MachinePointerInfo::getFixedStack(FrameIdx),
5529                                  false, false, 0);
5530
5531     // Load the value as a double.
5532     Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
5533                      MachinePointerInfo::getFixedStack(FrameIdx),
5534                      false, false, false, 0);
5535   }
5536
5537   // FCFID it and return it.
5538   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
5539   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
5540     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
5541   return FP;
5542 }
5543
5544 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
5545                                             SelectionDAG &DAG) const {
5546   SDLoc dl(Op);
5547   /*
5548    The rounding mode is in bits 30:31 of FPSR, and has the following
5549    settings:
5550      00 Round to nearest
5551      01 Round to 0
5552      10 Round to +inf
5553      11 Round to -inf
5554
5555   FLT_ROUNDS, on the other hand, expects the following:
5556     -1 Undefined
5557      0 Round to 0
5558      1 Round to nearest
5559      2 Round to +inf
5560      3 Round to -inf
5561
5562   To perform the conversion, we do:
5563     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
5564   */
5565
5566   MachineFunction &MF = DAG.getMachineFunction();
5567   EVT VT = Op.getValueType();
5568   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5569
5570   // Save FP Control Word to register
5571   EVT NodeTys[] = {
5572     MVT::f64,    // return register
5573     MVT::Glue    // unused in this context
5574   };
5575   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
5576
5577   // Save FP register to stack slot
5578   int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
5579   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
5580   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
5581                                StackSlot, MachinePointerInfo(), false, false,0);
5582
5583   // Load FP Control Word from low 32 bits of stack slot.
5584   SDValue Four = DAG.getConstant(4, PtrVT);
5585   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
5586   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
5587                             false, false, false, 0);
5588
5589   // Transform as necessary
5590   SDValue CWD1 =
5591     DAG.getNode(ISD::AND, dl, MVT::i32,
5592                 CWD, DAG.getConstant(3, MVT::i32));
5593   SDValue CWD2 =
5594     DAG.getNode(ISD::SRL, dl, MVT::i32,
5595                 DAG.getNode(ISD::AND, dl, MVT::i32,
5596                             DAG.getNode(ISD::XOR, dl, MVT::i32,
5597                                         CWD, DAG.getConstant(3, MVT::i32)),
5598                             DAG.getConstant(3, MVT::i32)),
5599                 DAG.getConstant(1, MVT::i32));
5600
5601   SDValue RetVal =
5602     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
5603
5604   return DAG.getNode((VT.getSizeInBits() < 16 ?
5605                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
5606 }
5607
5608 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
5609   EVT VT = Op.getValueType();
5610   unsigned BitWidth = VT.getSizeInBits();
5611   SDLoc dl(Op);
5612   assert(Op.getNumOperands() == 3 &&
5613          VT == Op.getOperand(1).getValueType() &&
5614          "Unexpected SHL!");
5615
5616   // Expand into a bunch of logical ops.  Note that these ops
5617   // depend on the PPC behavior for oversized shift amounts.
5618   SDValue Lo = Op.getOperand(0);
5619   SDValue Hi = Op.getOperand(1);
5620   SDValue Amt = Op.getOperand(2);
5621   EVT AmtVT = Amt.getValueType();
5622
5623   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
5624                              DAG.getConstant(BitWidth, AmtVT), Amt);
5625   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
5626   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
5627   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
5628   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
5629                              DAG.getConstant(-BitWidth, AmtVT));
5630   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
5631   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
5632   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
5633   SDValue OutOps[] = { OutLo, OutHi };
5634   return DAG.getMergeValues(OutOps, dl);
5635 }
5636
5637 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
5638   EVT VT = Op.getValueType();
5639   SDLoc dl(Op);
5640   unsigned BitWidth = VT.getSizeInBits();
5641   assert(Op.getNumOperands() == 3 &&
5642          VT == Op.getOperand(1).getValueType() &&
5643          "Unexpected SRL!");
5644
5645   // Expand into a bunch of logical ops.  Note that these ops
5646   // depend on the PPC behavior for oversized shift amounts.
5647   SDValue Lo = Op.getOperand(0);
5648   SDValue Hi = Op.getOperand(1);
5649   SDValue Amt = Op.getOperand(2);
5650   EVT AmtVT = Amt.getValueType();
5651
5652   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
5653                              DAG.getConstant(BitWidth, AmtVT), Amt);
5654   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
5655   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
5656   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
5657   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
5658                              DAG.getConstant(-BitWidth, AmtVT));
5659   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
5660   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
5661   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
5662   SDValue OutOps[] = { OutLo, OutHi };
5663   return DAG.getMergeValues(OutOps, dl);
5664 }
5665
5666 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
5667   SDLoc dl(Op);
5668   EVT VT = Op.getValueType();
5669   unsigned BitWidth = VT.getSizeInBits();
5670   assert(Op.getNumOperands() == 3 &&
5671          VT == Op.getOperand(1).getValueType() &&
5672          "Unexpected SRA!");
5673
5674   // Expand into a bunch of logical ops, followed by a select_cc.
5675   SDValue Lo = Op.getOperand(0);
5676   SDValue Hi = Op.getOperand(1);
5677   SDValue Amt = Op.getOperand(2);
5678   EVT AmtVT = Amt.getValueType();
5679
5680   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
5681                              DAG.getConstant(BitWidth, AmtVT), Amt);
5682   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
5683   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
5684   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
5685   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
5686                              DAG.getConstant(-BitWidth, AmtVT));
5687   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
5688   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
5689   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
5690                                   Tmp4, Tmp6, ISD::SETLE);
5691   SDValue OutOps[] = { OutLo, OutHi };
5692   return DAG.getMergeValues(OutOps, dl);
5693 }
5694
5695 //===----------------------------------------------------------------------===//
5696 // Vector related lowering.
5697 //
5698
5699 /// BuildSplatI - Build a canonical splati of Val with an element size of
5700 /// SplatSize.  Cast the result to VT.
5701 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
5702                              SelectionDAG &DAG, SDLoc dl) {
5703   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
5704
5705   static const EVT VTys[] = { // canonical VT to use for each size.
5706     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
5707   };
5708
5709   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
5710
5711   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
5712   if (Val == -1)
5713     SplatSize = 1;
5714
5715   EVT CanonicalVT = VTys[SplatSize-1];
5716
5717   // Build a canonical splat for this value.
5718   SDValue Elt = DAG.getConstant(Val, MVT::i32);
5719   SmallVector<SDValue, 8> Ops;
5720   Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
5721   SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
5722   return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
5723 }
5724
5725 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
5726 /// specified intrinsic ID.
5727 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
5728                                 SelectionDAG &DAG, SDLoc dl,
5729                                 EVT DestVT = MVT::Other) {
5730   if (DestVT == MVT::Other) DestVT = Op.getValueType();
5731   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
5732                      DAG.getConstant(IID, MVT::i32), Op);
5733 }
5734
5735 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
5736 /// specified intrinsic ID.
5737 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
5738                                 SelectionDAG &DAG, SDLoc dl,
5739                                 EVT DestVT = MVT::Other) {
5740   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
5741   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
5742                      DAG.getConstant(IID, MVT::i32), LHS, RHS);
5743 }
5744
5745 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
5746 /// specified intrinsic ID.
5747 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
5748                                 SDValue Op2, SelectionDAG &DAG,
5749                                 SDLoc dl, EVT DestVT = MVT::Other) {
5750   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
5751   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
5752                      DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
5753 }
5754
5755
5756 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
5757 /// amount.  The result has the specified value type.
5758 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
5759                              EVT VT, SelectionDAG &DAG, SDLoc dl) {
5760   // Force LHS/RHS to be the right type.
5761   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
5762   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
5763
5764   int Ops[16];
5765   for (unsigned i = 0; i != 16; ++i)
5766     Ops[i] = i + Amt;
5767   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
5768   return DAG.getNode(ISD::BITCAST, dl, VT, T);
5769 }
5770
5771 // If this is a case we can't handle, return null and let the default
5772 // expansion code take care of it.  If we CAN select this case, and if it
5773 // selects to a single instruction, return Op.  Otherwise, if we can codegen
5774 // this case more efficiently than a constant pool load, lower it to the
5775 // sequence of ops that should be used.
5776 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
5777                                              SelectionDAG &DAG) const {
5778   SDLoc dl(Op);
5779   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
5780   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
5781
5782   // Check if this is a splat of a constant value.
5783   APInt APSplatBits, APSplatUndef;
5784   unsigned SplatBitSize;
5785   bool HasAnyUndefs;
5786   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
5787                              HasAnyUndefs, 0, true) || SplatBitSize > 32)
5788     return SDValue();
5789
5790   unsigned SplatBits = APSplatBits.getZExtValue();
5791   unsigned SplatUndef = APSplatUndef.getZExtValue();
5792   unsigned SplatSize = SplatBitSize / 8;
5793
5794   // First, handle single instruction cases.
5795
5796   // All zeros?
5797   if (SplatBits == 0) {
5798     // Canonicalize all zero vectors to be v4i32.
5799     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
5800       SDValue Z = DAG.getConstant(0, MVT::i32);
5801       Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
5802       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
5803     }
5804     return Op;
5805   }
5806
5807   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
5808   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
5809                     (32-SplatBitSize));
5810   if (SextVal >= -16 && SextVal <= 15)
5811     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
5812
5813
5814   // Two instruction sequences.
5815
5816   // If this value is in the range [-32,30] and is even, use:
5817   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
5818   // If this value is in the range [17,31] and is odd, use:
5819   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
5820   // If this value is in the range [-31,-17] and is odd, use:
5821   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
5822   // Note the last two are three-instruction sequences.
5823   if (SextVal >= -32 && SextVal <= 31) {
5824     // To avoid having these optimizations undone by constant folding,
5825     // we convert to a pseudo that will be expanded later into one of
5826     // the above forms.
5827     SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
5828     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
5829               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
5830     SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32);
5831     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
5832     if (VT == Op.getValueType())
5833       return RetVal;
5834     else
5835       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
5836   }
5837
5838   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
5839   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
5840   // for fneg/fabs.
5841   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
5842     // Make -1 and vspltisw -1:
5843     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
5844
5845     // Make the VSLW intrinsic, computing 0x8000_0000.
5846     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
5847                                    OnesV, DAG, dl);
5848
5849     // xor by OnesV to invert it.
5850     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
5851     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5852   }
5853
5854   // The remaining cases assume either big endian element order or
5855   // a splat-size that equates to the element size of the vector
5856   // to be built.  An example that doesn't work for little endian is
5857   // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
5858   // and a vector element size of 16 bits.  The code below will
5859   // produce the vector in big endian element order, which for little
5860   // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
5861
5862   // For now, just avoid these optimizations in that case.
5863   // FIXME: Develop correct optimizations for LE with mismatched
5864   // splat and element sizes.
5865
5866   if (Subtarget.isLittleEndian() &&
5867       SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
5868     return SDValue();
5869
5870   // Check to see if this is a wide variety of vsplti*, binop self cases.
5871   static const signed char SplatCsts[] = {
5872     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
5873     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
5874   };
5875
5876   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
5877     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
5878     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
5879     int i = SplatCsts[idx];
5880
5881     // Figure out what shift amount will be used by altivec if shifted by i in
5882     // this splat size.
5883     unsigned TypeShiftAmt = i & (SplatBitSize-1);
5884
5885     // vsplti + shl self.
5886     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
5887       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5888       static const unsigned IIDs[] = { // Intrinsic to use for each size.
5889         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
5890         Intrinsic::ppc_altivec_vslw
5891       };
5892       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5893       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5894     }
5895
5896     // vsplti + srl self.
5897     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
5898       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5899       static const unsigned IIDs[] = { // Intrinsic to use for each size.
5900         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
5901         Intrinsic::ppc_altivec_vsrw
5902       };
5903       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5904       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5905     }
5906
5907     // vsplti + sra self.
5908     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
5909       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5910       static const unsigned IIDs[] = { // Intrinsic to use for each size.
5911         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
5912         Intrinsic::ppc_altivec_vsraw
5913       };
5914       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5915       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5916     }
5917
5918     // vsplti + rol self.
5919     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
5920                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
5921       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
5922       static const unsigned IIDs[] = { // Intrinsic to use for each size.
5923         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
5924         Intrinsic::ppc_altivec_vrlw
5925       };
5926       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
5927       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
5928     }
5929
5930     // t = vsplti c, result = vsldoi t, t, 1
5931     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
5932       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
5933       return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
5934     }
5935     // t = vsplti c, result = vsldoi t, t, 2
5936     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
5937       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
5938       return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
5939     }
5940     // t = vsplti c, result = vsldoi t, t, 3
5941     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
5942       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
5943       return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
5944     }
5945   }
5946
5947   return SDValue();
5948 }
5949
5950 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
5951 /// the specified operations to build the shuffle.
5952 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
5953                                       SDValue RHS, SelectionDAG &DAG,
5954                                       SDLoc dl) {
5955   unsigned OpNum = (PFEntry >> 26) & 0x0F;
5956   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
5957   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
5958
5959   enum {
5960     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
5961     OP_VMRGHW,
5962     OP_VMRGLW,
5963     OP_VSPLTISW0,
5964     OP_VSPLTISW1,
5965     OP_VSPLTISW2,
5966     OP_VSPLTISW3,
5967     OP_VSLDOI4,
5968     OP_VSLDOI8,
5969     OP_VSLDOI12
5970   };
5971
5972   if (OpNum == OP_COPY) {
5973     if (LHSID == (1*9+2)*9+3) return LHS;
5974     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
5975     return RHS;
5976   }
5977
5978   SDValue OpLHS, OpRHS;
5979   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
5980   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
5981
5982   int ShufIdxs[16];
5983   switch (OpNum) {
5984   default: llvm_unreachable("Unknown i32 permute!");
5985   case OP_VMRGHW:
5986     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
5987     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
5988     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
5989     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
5990     break;
5991   case OP_VMRGLW:
5992     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
5993     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
5994     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
5995     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
5996     break;
5997   case OP_VSPLTISW0:
5998     for (unsigned i = 0; i != 16; ++i)
5999       ShufIdxs[i] = (i&3)+0;
6000     break;
6001   case OP_VSPLTISW1:
6002     for (unsigned i = 0; i != 16; ++i)
6003       ShufIdxs[i] = (i&3)+4;
6004     break;
6005   case OP_VSPLTISW2:
6006     for (unsigned i = 0; i != 16; ++i)
6007       ShufIdxs[i] = (i&3)+8;
6008     break;
6009   case OP_VSPLTISW3:
6010     for (unsigned i = 0; i != 16; ++i)
6011       ShufIdxs[i] = (i&3)+12;
6012     break;
6013   case OP_VSLDOI4:
6014     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
6015   case OP_VSLDOI8:
6016     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
6017   case OP_VSLDOI12:
6018     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
6019   }
6020   EVT VT = OpLHS.getValueType();
6021   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
6022   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
6023   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
6024   return DAG.getNode(ISD::BITCAST, dl, VT, T);
6025 }
6026
6027 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
6028 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
6029 /// return the code it can be lowered into.  Worst case, it can always be
6030 /// lowered into a vperm.
6031 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
6032                                                SelectionDAG &DAG) const {
6033   SDLoc dl(Op);
6034   SDValue V1 = Op.getOperand(0);
6035   SDValue V2 = Op.getOperand(1);
6036   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
6037   EVT VT = Op.getValueType();
6038   bool isLittleEndian = Subtarget.isLittleEndian();
6039
6040   // Cases that are handled by instructions that take permute immediates
6041   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
6042   // selected by the instruction selector.
6043   if (V2.getOpcode() == ISD::UNDEF) {
6044     if (PPC::isSplatShuffleMask(SVOp, 1) ||
6045         PPC::isSplatShuffleMask(SVOp, 2) ||
6046         PPC::isSplatShuffleMask(SVOp, 4) ||
6047         PPC::isVPKUWUMShuffleMask(SVOp, true, DAG) ||
6048         PPC::isVPKUHUMShuffleMask(SVOp, true, DAG) ||
6049         PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 ||
6050         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
6051         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
6052         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
6053         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
6054         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
6055         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
6056       return Op;
6057     }
6058   }
6059
6060   // Altivec has a variety of "shuffle immediates" that take two vector inputs
6061   // and produce a fixed permutation.  If any of these match, do not lower to
6062   // VPERM.
6063   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
6064   if (PPC::isVPKUWUMShuffleMask(SVOp, false, DAG) ||
6065       PPC::isVPKUHUMShuffleMask(SVOp, false, DAG) ||
6066       PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 ||
6067       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
6068       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
6069       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
6070       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
6071       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
6072       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
6073     return Op;
6074
6075   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
6076   // perfect shuffle table to emit an optimal matching sequence.
6077   ArrayRef<int> PermMask = SVOp->getMask();
6078
6079   unsigned PFIndexes[4];
6080   bool isFourElementShuffle = true;
6081   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
6082     unsigned EltNo = 8;   // Start out undef.
6083     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
6084       if (PermMask[i*4+j] < 0)
6085         continue;   // Undef, ignore it.
6086
6087       unsigned ByteSource = PermMask[i*4+j];
6088       if ((ByteSource & 3) != j) {
6089         isFourElementShuffle = false;
6090         break;
6091       }
6092
6093       if (EltNo == 8) {
6094         EltNo = ByteSource/4;
6095       } else if (EltNo != ByteSource/4) {
6096         isFourElementShuffle = false;
6097         break;
6098       }
6099     }
6100     PFIndexes[i] = EltNo;
6101   }
6102
6103   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
6104   // perfect shuffle vector to determine if it is cost effective to do this as
6105   // discrete instructions, or whether we should use a vperm.
6106   // For now, we skip this for little endian until such time as we have a
6107   // little-endian perfect shuffle table.
6108   if (isFourElementShuffle && !isLittleEndian) {
6109     // Compute the index in the perfect shuffle table.
6110     unsigned PFTableIndex =
6111       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6112
6113     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6114     unsigned Cost  = (PFEntry >> 30);
6115
6116     // Determining when to avoid vperm is tricky.  Many things affect the cost
6117     // of vperm, particularly how many times the perm mask needs to be computed.
6118     // For example, if the perm mask can be hoisted out of a loop or is already
6119     // used (perhaps because there are multiple permutes with the same shuffle
6120     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
6121     // the loop requires an extra register.
6122     //
6123     // As a compromise, we only emit discrete instructions if the shuffle can be
6124     // generated in 3 or fewer operations.  When we have loop information
6125     // available, if this block is within a loop, we should avoid using vperm
6126     // for 3-operation perms and use a constant pool load instead.
6127     if (Cost < 3)
6128       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
6129   }
6130
6131   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
6132   // vector that will get spilled to the constant pool.
6133   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
6134
6135   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
6136   // that it is in input element units, not in bytes.  Convert now.
6137
6138   // For little endian, the order of the input vectors is reversed, and
6139   // the permutation mask is complemented with respect to 31.  This is
6140   // necessary to produce proper semantics with the big-endian-biased vperm
6141   // instruction.
6142   EVT EltVT = V1.getValueType().getVectorElementType();
6143   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
6144
6145   SmallVector<SDValue, 16> ResultMask;
6146   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
6147     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
6148
6149     for (unsigned j = 0; j != BytesPerElement; ++j)
6150       if (isLittleEndian)
6151         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j),
6152                                              MVT::i32));
6153       else
6154         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
6155                                              MVT::i32));
6156   }
6157
6158   SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
6159                                   ResultMask);
6160   if (isLittleEndian)
6161     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
6162                        V2, V1, VPermMask);
6163   else
6164     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
6165                        V1, V2, VPermMask);
6166 }
6167
6168 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
6169 /// altivec comparison.  If it is, return true and fill in Opc/isDot with
6170 /// information about the intrinsic.
6171 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
6172                                   bool &isDot) {
6173   unsigned IntrinsicID =
6174     cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
6175   CompareOpc = -1;
6176   isDot = false;
6177   switch (IntrinsicID) {
6178   default: return false;
6179     // Comparison predicates.
6180   case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
6181   case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
6182   case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
6183   case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
6184   case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
6185   case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
6186   case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
6187   case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
6188   case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
6189   case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
6190   case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
6191   case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
6192   case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
6193
6194     // Normal Comparisons.
6195   case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
6196   case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
6197   case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
6198   case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
6199   case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
6200   case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
6201   case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
6202   case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
6203   case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
6204   case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
6205   case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
6206   case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
6207   case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
6208   }
6209   return true;
6210 }
6211
6212 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
6213 /// lower, do it, otherwise return null.
6214 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
6215                                                    SelectionDAG &DAG) const {
6216   // If this is a lowered altivec predicate compare, CompareOpc is set to the
6217   // opcode number of the comparison.
6218   SDLoc dl(Op);
6219   int CompareOpc;
6220   bool isDot;
6221   if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
6222     return SDValue();    // Don't custom lower most intrinsics.
6223
6224   // If this is a non-dot comparison, make the VCMP node and we are done.
6225   if (!isDot) {
6226     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
6227                               Op.getOperand(1), Op.getOperand(2),
6228                               DAG.getConstant(CompareOpc, MVT::i32));
6229     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
6230   }
6231
6232   // Create the PPCISD altivec 'dot' comparison node.
6233   SDValue Ops[] = {
6234     Op.getOperand(2),  // LHS
6235     Op.getOperand(3),  // RHS
6236     DAG.getConstant(CompareOpc, MVT::i32)
6237   };
6238   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
6239   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
6240
6241   // Now that we have the comparison, emit a copy from the CR to a GPR.
6242   // This is flagged to the above dot comparison.
6243   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
6244                                 DAG.getRegister(PPC::CR6, MVT::i32),
6245                                 CompNode.getValue(1));
6246
6247   // Unpack the result based on how the target uses it.
6248   unsigned BitNo;   // Bit # of CR6.
6249   bool InvertBit;   // Invert result?
6250   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
6251   default:  // Can't happen, don't crash on invalid number though.
6252   case 0:   // Return the value of the EQ bit of CR6.
6253     BitNo = 0; InvertBit = false;
6254     break;
6255   case 1:   // Return the inverted value of the EQ bit of CR6.
6256     BitNo = 0; InvertBit = true;
6257     break;
6258   case 2:   // Return the value of the LT bit of CR6.
6259     BitNo = 2; InvertBit = false;
6260     break;
6261   case 3:   // Return the inverted value of the LT bit of CR6.
6262     BitNo = 2; InvertBit = true;
6263     break;
6264   }
6265
6266   // Shift the bit into the low position.
6267   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
6268                       DAG.getConstant(8-(3-BitNo), MVT::i32));
6269   // Isolate the bit.
6270   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
6271                       DAG.getConstant(1, MVT::i32));
6272
6273   // If we are supposed to, toggle the bit.
6274   if (InvertBit)
6275     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
6276                         DAG.getConstant(1, MVT::i32));
6277   return Flags;
6278 }
6279
6280 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
6281                                                   SelectionDAG &DAG) const {
6282   SDLoc dl(Op);
6283   // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
6284   // instructions), but for smaller types, we need to first extend up to v2i32
6285   // before doing going farther.
6286   if (Op.getValueType() == MVT::v2i64) {
6287     EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
6288     if (ExtVT != MVT::v2i32) {
6289       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
6290       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
6291                        DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
6292                                         ExtVT.getVectorElementType(), 4)));
6293       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
6294       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
6295                        DAG.getValueType(MVT::v2i32));
6296     }
6297
6298     return Op;
6299   }
6300
6301   return SDValue();
6302 }
6303
6304 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
6305                                                    SelectionDAG &DAG) const {
6306   SDLoc dl(Op);
6307   // Create a stack slot that is 16-byte aligned.
6308   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
6309   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
6310   EVT PtrVT = getPointerTy();
6311   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6312
6313   // Store the input value into Value#0 of the stack slot.
6314   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
6315                                Op.getOperand(0), FIdx, MachinePointerInfo(),
6316                                false, false, 0);
6317   // Load it out.
6318   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
6319                      false, false, false, 0);
6320 }
6321
6322 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
6323   SDLoc dl(Op);
6324   if (Op.getValueType() == MVT::v4i32) {
6325     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6326
6327     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
6328     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
6329
6330     SDValue RHSSwap =   // = vrlw RHS, 16
6331       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
6332
6333     // Shrinkify inputs to v8i16.
6334     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
6335     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
6336     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
6337
6338     // Low parts multiplied together, generating 32-bit results (we ignore the
6339     // top parts).
6340     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
6341                                         LHS, RHS, DAG, dl, MVT::v4i32);
6342
6343     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
6344                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
6345     // Shift the high parts up 16 bits.
6346     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
6347                               Neg16, DAG, dl);
6348     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
6349   } else if (Op.getValueType() == MVT::v8i16) {
6350     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6351
6352     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
6353
6354     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
6355                             LHS, RHS, Zero, DAG, dl);
6356   } else if (Op.getValueType() == MVT::v16i8) {
6357     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
6358     bool isLittleEndian = Subtarget.isLittleEndian();
6359
6360     // Multiply the even 8-bit parts, producing 16-bit sums.
6361     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
6362                                            LHS, RHS, DAG, dl, MVT::v8i16);
6363     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
6364
6365     // Multiply the odd 8-bit parts, producing 16-bit sums.
6366     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
6367                                           LHS, RHS, DAG, dl, MVT::v8i16);
6368     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
6369
6370     // Merge the results together.  Because vmuleub and vmuloub are
6371     // instructions with a big-endian bias, we must reverse the
6372     // element numbering and reverse the meaning of "odd" and "even"
6373     // when generating little endian code.
6374     int Ops[16];
6375     for (unsigned i = 0; i != 8; ++i) {
6376       if (isLittleEndian) {
6377         Ops[i*2  ] = 2*i;
6378         Ops[i*2+1] = 2*i+16;
6379       } else {
6380         Ops[i*2  ] = 2*i+1;
6381         Ops[i*2+1] = 2*i+1+16;
6382       }
6383     }
6384     if (isLittleEndian)
6385       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
6386     else
6387       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
6388   } else {
6389     llvm_unreachable("Unknown mul to lower!");
6390   }
6391 }
6392
6393 /// LowerOperation - Provide custom lowering hooks for some operations.
6394 ///
6395 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
6396   switch (Op.getOpcode()) {
6397   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
6398   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
6399   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
6400   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
6401   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
6402   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
6403   case ISD::SETCC:              return LowerSETCC(Op, DAG);
6404   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
6405   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
6406   case ISD::VASTART:
6407     return LowerVASTART(Op, DAG, Subtarget);
6408
6409   case ISD::VAARG:
6410     return LowerVAARG(Op, DAG, Subtarget);
6411
6412   case ISD::VACOPY:
6413     return LowerVACOPY(Op, DAG, Subtarget);
6414
6415   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, Subtarget);
6416   case ISD::DYNAMIC_STACKALLOC:
6417     return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
6418
6419   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
6420   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
6421
6422   case ISD::LOAD:               return LowerLOAD(Op, DAG);
6423   case ISD::STORE:              return LowerSTORE(Op, DAG);
6424   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
6425   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
6426   case ISD::FP_TO_UINT:
6427   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
6428                                                        SDLoc(Op));
6429   case ISD::UINT_TO_FP:
6430   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
6431   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
6432
6433   // Lower 64-bit shifts.
6434   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
6435   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
6436   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
6437
6438   // Vector-related lowering.
6439   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
6440   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
6441   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
6442   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
6443   case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
6444   case ISD::MUL:                return LowerMUL(Op, DAG);
6445
6446   // For counter-based loop handling.
6447   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
6448
6449   // Frame & Return address.
6450   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
6451   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
6452   }
6453 }
6454
6455 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
6456                                            SmallVectorImpl<SDValue>&Results,
6457                                            SelectionDAG &DAG) const {
6458   const TargetMachine &TM = getTargetMachine();
6459   SDLoc dl(N);
6460   switch (N->getOpcode()) {
6461   default:
6462     llvm_unreachable("Do not know how to custom type legalize this operation!");
6463   case ISD::INTRINSIC_W_CHAIN: {
6464     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
6465         Intrinsic::ppc_is_decremented_ctr_nonzero)
6466       break;
6467
6468     assert(N->getValueType(0) == MVT::i1 &&
6469            "Unexpected result type for CTR decrement intrinsic");
6470     EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
6471     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
6472     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
6473                                  N->getOperand(1));
6474
6475     Results.push_back(NewInt);
6476     Results.push_back(NewInt.getValue(1));
6477     break;
6478   }
6479   case ISD::VAARG: {
6480     if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
6481         || TM.getSubtarget<PPCSubtarget>().isPPC64())
6482       return;
6483
6484     EVT VT = N->getValueType(0);
6485
6486     if (VT == MVT::i64) {
6487       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget);
6488
6489       Results.push_back(NewNode);
6490       Results.push_back(NewNode.getValue(1));
6491     }
6492     return;
6493   }
6494   case ISD::FP_ROUND_INREG: {
6495     assert(N->getValueType(0) == MVT::ppcf128);
6496     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
6497     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
6498                              MVT::f64, N->getOperand(0),
6499                              DAG.getIntPtrConstant(0));
6500     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
6501                              MVT::f64, N->getOperand(0),
6502                              DAG.getIntPtrConstant(1));
6503
6504     // Add the two halves of the long double in round-to-zero mode.
6505     SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
6506
6507     // We know the low half is about to be thrown away, so just use something
6508     // convenient.
6509     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
6510                                 FPreg, FPreg));
6511     return;
6512   }
6513   case ISD::FP_TO_SINT:
6514     // LowerFP_TO_INT() can only handle f32 and f64.
6515     if (N->getOperand(0).getValueType() == MVT::ppcf128)
6516       return;
6517     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
6518     return;
6519   }
6520 }
6521
6522
6523 //===----------------------------------------------------------------------===//
6524 //  Other Lowering Code
6525 //===----------------------------------------------------------------------===//
6526
6527 MachineBasicBlock *
6528 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
6529                                     bool is64bit, unsigned BinOpcode) const {
6530   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
6531   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6532
6533   const BasicBlock *LLVM_BB = BB->getBasicBlock();
6534   MachineFunction *F = BB->getParent();
6535   MachineFunction::iterator It = BB;
6536   ++It;
6537
6538   unsigned dest = MI->getOperand(0).getReg();
6539   unsigned ptrA = MI->getOperand(1).getReg();
6540   unsigned ptrB = MI->getOperand(2).getReg();
6541   unsigned incr = MI->getOperand(3).getReg();
6542   DebugLoc dl = MI->getDebugLoc();
6543
6544   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
6545   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
6546   F->insert(It, loopMBB);
6547   F->insert(It, exitMBB);
6548   exitMBB->splice(exitMBB->begin(), BB,
6549                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
6550   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
6551
6552   MachineRegisterInfo &RegInfo = F->getRegInfo();
6553   unsigned TmpReg = (!BinOpcode) ? incr :
6554     RegInfo.createVirtualRegister(
6555        is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
6556                  (const TargetRegisterClass *) &PPC::GPRCRegClass);
6557
6558   //  thisMBB:
6559   //   ...
6560   //   fallthrough --> loopMBB
6561   BB->addSuccessor(loopMBB);
6562
6563   //  loopMBB:
6564   //   l[wd]arx dest, ptr
6565   //   add r0, dest, incr
6566   //   st[wd]cx. r0, ptr
6567   //   bne- loopMBB
6568   //   fallthrough --> exitMBB
6569   BB = loopMBB;
6570   BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
6571     .addReg(ptrA).addReg(ptrB);
6572   if (BinOpcode)
6573     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
6574   BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
6575     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
6576   BuildMI(BB, dl, TII->get(PPC::BCC))
6577     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
6578   BB->addSuccessor(loopMBB);
6579   BB->addSuccessor(exitMBB);
6580
6581   //  exitMBB:
6582   //   ...
6583   BB = exitMBB;
6584   return BB;
6585 }
6586
6587 MachineBasicBlock *
6588 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
6589                                             MachineBasicBlock *BB,
6590                                             bool is8bit,    // operation
6591                                             unsigned BinOpcode) const {
6592   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
6593   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6594   // In 64 bit mode we have to use 64 bits for addresses, even though the
6595   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
6596   // registers without caring whether they're 32 or 64, but here we're
6597   // doing actual arithmetic on the addresses.
6598   bool is64bit = Subtarget.isPPC64();
6599   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
6600
6601   const BasicBlock *LLVM_BB = BB->getBasicBlock();
6602   MachineFunction *F = BB->getParent();
6603   MachineFunction::iterator It = BB;
6604   ++It;
6605
6606   unsigned dest = MI->getOperand(0).getReg();
6607   unsigned ptrA = MI->getOperand(1).getReg();
6608   unsigned ptrB = MI->getOperand(2).getReg();
6609   unsigned incr = MI->getOperand(3).getReg();
6610   DebugLoc dl = MI->getDebugLoc();
6611
6612   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
6613   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
6614   F->insert(It, loopMBB);
6615   F->insert(It, exitMBB);
6616   exitMBB->splice(exitMBB->begin(), BB,
6617                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
6618   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
6619
6620   MachineRegisterInfo &RegInfo = F->getRegInfo();
6621   const TargetRegisterClass *RC =
6622     is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
6623               (const TargetRegisterClass *) &PPC::GPRCRegClass;
6624   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
6625   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
6626   unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
6627   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
6628   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
6629   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
6630   unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
6631   unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
6632   unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
6633   unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
6634   unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
6635   unsigned Ptr1Reg;
6636   unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
6637
6638   //  thisMBB:
6639   //   ...
6640   //   fallthrough --> loopMBB
6641   BB->addSuccessor(loopMBB);
6642
6643   // The 4-byte load must be aligned, while a char or short may be
6644   // anywhere in the word.  Hence all this nasty bookkeeping code.
6645   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
6646   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
6647   //   xori shift, shift1, 24 [16]
6648   //   rlwinm ptr, ptr1, 0, 0, 29
6649   //   slw incr2, incr, shift
6650   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
6651   //   slw mask, mask2, shift
6652   //  loopMBB:
6653   //   lwarx tmpDest, ptr
6654   //   add tmp, tmpDest, incr2
6655   //   andc tmp2, tmpDest, mask
6656   //   and tmp3, tmp, mask
6657   //   or tmp4, tmp3, tmp2
6658   //   stwcx. tmp4, ptr
6659   //   bne- loopMBB
6660   //   fallthrough --> exitMBB
6661   //   srw dest, tmpDest, shift
6662   if (ptrA != ZeroReg) {
6663     Ptr1Reg = RegInfo.createVirtualRegister(RC);
6664     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
6665       .addReg(ptrA).addReg(ptrB);
6666   } else {
6667     Ptr1Reg = ptrB;
6668   }
6669   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
6670       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
6671   BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
6672       .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
6673   if (is64bit)
6674     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
6675       .addReg(Ptr1Reg).addImm(0).addImm(61);
6676   else
6677     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
6678       .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
6679   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
6680       .addReg(incr).addReg(ShiftReg);
6681   if (is8bit)
6682     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
6683   else {
6684     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
6685     BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
6686   }
6687   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
6688       .addReg(Mask2Reg).addReg(ShiftReg);
6689
6690   BB = loopMBB;
6691   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
6692     .addReg(ZeroReg).addReg(PtrReg);
6693   if (BinOpcode)
6694     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
6695       .addReg(Incr2Reg).addReg(TmpDestReg);
6696   BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
6697     .addReg(TmpDestReg).addReg(MaskReg);
6698   BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
6699     .addReg(TmpReg).addReg(MaskReg);
6700   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
6701     .addReg(Tmp3Reg).addReg(Tmp2Reg);
6702   BuildMI(BB, dl, TII->get(PPC::STWCX))
6703     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
6704   BuildMI(BB, dl, TII->get(PPC::BCC))
6705     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
6706   BB->addSuccessor(loopMBB);
6707   BB->addSuccessor(exitMBB);
6708
6709   //  exitMBB:
6710   //   ...
6711   BB = exitMBB;
6712   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
6713     .addReg(ShiftReg);
6714   return BB;
6715 }
6716
6717 llvm::MachineBasicBlock*
6718 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
6719                                     MachineBasicBlock *MBB) const {
6720   DebugLoc DL = MI->getDebugLoc();
6721   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6722
6723   MachineFunction *MF = MBB->getParent();
6724   MachineRegisterInfo &MRI = MF->getRegInfo();
6725
6726   const BasicBlock *BB = MBB->getBasicBlock();
6727   MachineFunction::iterator I = MBB;
6728   ++I;
6729
6730   // Memory Reference
6731   MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
6732   MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
6733
6734   unsigned DstReg = MI->getOperand(0).getReg();
6735   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
6736   assert(RC->hasType(MVT::i32) && "Invalid destination!");
6737   unsigned mainDstReg = MRI.createVirtualRegister(RC);
6738   unsigned restoreDstReg = MRI.createVirtualRegister(RC);
6739
6740   MVT PVT = getPointerTy();
6741   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
6742          "Invalid Pointer Size!");
6743   // For v = setjmp(buf), we generate
6744   //
6745   // thisMBB:
6746   //  SjLjSetup mainMBB
6747   //  bl mainMBB
6748   //  v_restore = 1
6749   //  b sinkMBB
6750   //
6751   // mainMBB:
6752   //  buf[LabelOffset] = LR
6753   //  v_main = 0
6754   //
6755   // sinkMBB:
6756   //  v = phi(main, restore)
6757   //
6758
6759   MachineBasicBlock *thisMBB = MBB;
6760   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
6761   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
6762   MF->insert(I, mainMBB);
6763   MF->insert(I, sinkMBB);
6764
6765   MachineInstrBuilder MIB;
6766
6767   // Transfer the remainder of BB and its successor edges to sinkMBB.
6768   sinkMBB->splice(sinkMBB->begin(), MBB,
6769                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
6770   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
6771
6772   // Note that the structure of the jmp_buf used here is not compatible
6773   // with that used by libc, and is not designed to be. Specifically, it
6774   // stores only those 'reserved' registers that LLVM does not otherwise
6775   // understand how to spill. Also, by convention, by the time this
6776   // intrinsic is called, Clang has already stored the frame address in the
6777   // first slot of the buffer and stack address in the third. Following the
6778   // X86 target code, we'll store the jump address in the second slot. We also
6779   // need to save the TOC pointer (R2) to handle jumps between shared
6780   // libraries, and that will be stored in the fourth slot. The thread
6781   // identifier (R13) is not affected.
6782
6783   // thisMBB:
6784   const int64_t LabelOffset = 1 * PVT.getStoreSize();
6785   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
6786   const int64_t BPOffset    = 4 * PVT.getStoreSize();
6787
6788   // Prepare IP either in reg.
6789   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
6790   unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
6791   unsigned BufReg = MI->getOperand(1).getReg();
6792
6793   if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
6794     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
6795             .addReg(PPC::X2)
6796             .addImm(TOCOffset)
6797             .addReg(BufReg);
6798     MIB.setMemRefs(MMOBegin, MMOEnd);
6799   }
6800
6801   // Naked functions never have a base pointer, and so we use r1. For all
6802   // other functions, this decision must be delayed until during PEI.
6803   unsigned BaseReg;
6804   if (MF->getFunction()->getAttributes().hasAttribute(
6805           AttributeSet::FunctionIndex, Attribute::Naked))
6806     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
6807   else
6808     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
6809
6810   MIB = BuildMI(*thisMBB, MI, DL,
6811                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
6812           .addReg(BaseReg)
6813           .addImm(BPOffset)
6814           .addReg(BufReg);
6815   MIB.setMemRefs(MMOBegin, MMOEnd);
6816
6817   // Setup
6818   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
6819   const PPCRegisterInfo *TRI =
6820     static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
6821   MIB.addRegMask(TRI->getNoPreservedMask());
6822
6823   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
6824
6825   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
6826           .addMBB(mainMBB);
6827   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
6828
6829   thisMBB->addSuccessor(mainMBB, /* weight */ 0);
6830   thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
6831
6832   // mainMBB:
6833   //  mainDstReg = 0
6834   MIB = BuildMI(mainMBB, DL,
6835     TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
6836
6837   // Store IP
6838   if (Subtarget.isPPC64()) {
6839     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
6840             .addReg(LabelReg)
6841             .addImm(LabelOffset)
6842             .addReg(BufReg);
6843   } else {
6844     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
6845             .addReg(LabelReg)
6846             .addImm(LabelOffset)
6847             .addReg(BufReg);
6848   }
6849
6850   MIB.setMemRefs(MMOBegin, MMOEnd);
6851
6852   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
6853   mainMBB->addSuccessor(sinkMBB);
6854
6855   // sinkMBB:
6856   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
6857           TII->get(PPC::PHI), DstReg)
6858     .addReg(mainDstReg).addMBB(mainMBB)
6859     .addReg(restoreDstReg).addMBB(thisMBB);
6860
6861   MI->eraseFromParent();
6862   return sinkMBB;
6863 }
6864
6865 MachineBasicBlock *
6866 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
6867                                      MachineBasicBlock *MBB) const {
6868   DebugLoc DL = MI->getDebugLoc();
6869   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6870
6871   MachineFunction *MF = MBB->getParent();
6872   MachineRegisterInfo &MRI = MF->getRegInfo();
6873
6874   // Memory Reference
6875   MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
6876   MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
6877
6878   MVT PVT = getPointerTy();
6879   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
6880          "Invalid Pointer Size!");
6881
6882   const TargetRegisterClass *RC =
6883     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6884   unsigned Tmp = MRI.createVirtualRegister(RC);
6885   // Since FP is only updated here but NOT referenced, it's treated as GPR.
6886   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
6887   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
6888   unsigned BP  = (PVT == MVT::i64) ? PPC::X30 :
6889                   (Subtarget.isSVR4ABI() &&
6890                    MF->getTarget().getRelocationModel() == Reloc::PIC_ ?
6891                      PPC::R29 : PPC::R30);
6892
6893   MachineInstrBuilder MIB;
6894
6895   const int64_t LabelOffset = 1 * PVT.getStoreSize();
6896   const int64_t SPOffset    = 2 * PVT.getStoreSize();
6897   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
6898   const int64_t BPOffset    = 4 * PVT.getStoreSize();
6899
6900   unsigned BufReg = MI->getOperand(0).getReg();
6901
6902   // Reload FP (the jumped-to function may not have had a
6903   // frame pointer, and if so, then its r31 will be restored
6904   // as necessary).
6905   if (PVT == MVT::i64) {
6906     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
6907             .addImm(0)
6908             .addReg(BufReg);
6909   } else {
6910     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
6911             .addImm(0)
6912             .addReg(BufReg);
6913   }
6914   MIB.setMemRefs(MMOBegin, MMOEnd);
6915
6916   // Reload IP
6917   if (PVT == MVT::i64) {
6918     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
6919             .addImm(LabelOffset)
6920             .addReg(BufReg);
6921   } else {
6922     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
6923             .addImm(LabelOffset)
6924             .addReg(BufReg);
6925   }
6926   MIB.setMemRefs(MMOBegin, MMOEnd);
6927
6928   // Reload SP
6929   if (PVT == MVT::i64) {
6930     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
6931             .addImm(SPOffset)
6932             .addReg(BufReg);
6933   } else {
6934     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
6935             .addImm(SPOffset)
6936             .addReg(BufReg);
6937   }
6938   MIB.setMemRefs(MMOBegin, MMOEnd);
6939
6940   // Reload BP
6941   if (PVT == MVT::i64) {
6942     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
6943             .addImm(BPOffset)
6944             .addReg(BufReg);
6945   } else {
6946     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
6947             .addImm(BPOffset)
6948             .addReg(BufReg);
6949   }
6950   MIB.setMemRefs(MMOBegin, MMOEnd);
6951
6952   // Reload TOC
6953   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
6954     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
6955             .addImm(TOCOffset)
6956             .addReg(BufReg);
6957
6958     MIB.setMemRefs(MMOBegin, MMOEnd);
6959   }
6960
6961   // Jump
6962   BuildMI(*MBB, MI, DL,
6963           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
6964   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
6965
6966   MI->eraseFromParent();
6967   return MBB;
6968 }
6969
6970 MachineBasicBlock *
6971 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
6972                                                MachineBasicBlock *BB) const {
6973   if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
6974       MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
6975     return emitEHSjLjSetJmp(MI, BB);
6976   } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
6977              MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
6978     return emitEHSjLjLongJmp(MI, BB);
6979   }
6980
6981   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
6982
6983   // To "insert" these instructions we actually have to insert their
6984   // control-flow patterns.
6985   const BasicBlock *LLVM_BB = BB->getBasicBlock();
6986   MachineFunction::iterator It = BB;
6987   ++It;
6988
6989   MachineFunction *F = BB->getParent();
6990
6991   if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
6992                                  MI->getOpcode() == PPC::SELECT_CC_I8 ||
6993                                  MI->getOpcode() == PPC::SELECT_I4 ||
6994                                  MI->getOpcode() == PPC::SELECT_I8)) {
6995     SmallVector<MachineOperand, 2> Cond;
6996     if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
6997         MI->getOpcode() == PPC::SELECT_CC_I8)
6998       Cond.push_back(MI->getOperand(4));
6999     else
7000       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
7001     Cond.push_back(MI->getOperand(1));
7002
7003     DebugLoc dl = MI->getDebugLoc();
7004     const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
7005     TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
7006                       Cond, MI->getOperand(2).getReg(),
7007                       MI->getOperand(3).getReg());
7008   } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
7009              MI->getOpcode() == PPC::SELECT_CC_I8 ||
7010              MI->getOpcode() == PPC::SELECT_CC_F4 ||
7011              MI->getOpcode() == PPC::SELECT_CC_F8 ||
7012              MI->getOpcode() == PPC::SELECT_CC_VRRC ||
7013              MI->getOpcode() == PPC::SELECT_I4 ||
7014              MI->getOpcode() == PPC::SELECT_I8 ||
7015              MI->getOpcode() == PPC::SELECT_F4 ||
7016              MI->getOpcode() == PPC::SELECT_F8 ||
7017              MI->getOpcode() == PPC::SELECT_VRRC) {
7018     // The incoming instruction knows the destination vreg to set, the
7019     // condition code register to branch on, the true/false values to
7020     // select between, and a branch opcode to use.
7021
7022     //  thisMBB:
7023     //  ...
7024     //   TrueVal = ...
7025     //   cmpTY ccX, r1, r2
7026     //   bCC copy1MBB
7027     //   fallthrough --> copy0MBB
7028     MachineBasicBlock *thisMBB = BB;
7029     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
7030     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
7031     DebugLoc dl = MI->getDebugLoc();
7032     F->insert(It, copy0MBB);
7033     F->insert(It, sinkMBB);
7034
7035     // Transfer the remainder of BB and its successor edges to sinkMBB.
7036     sinkMBB->splice(sinkMBB->begin(), BB,
7037                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
7038     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
7039
7040     // Next, add the true and fallthrough blocks as its successors.
7041     BB->addSuccessor(copy0MBB);
7042     BB->addSuccessor(sinkMBB);
7043
7044     if (MI->getOpcode() == PPC::SELECT_I4 ||
7045         MI->getOpcode() == PPC::SELECT_I8 ||
7046         MI->getOpcode() == PPC::SELECT_F4 ||
7047         MI->getOpcode() == PPC::SELECT_F8 ||
7048         MI->getOpcode() == PPC::SELECT_VRRC) {
7049       BuildMI(BB, dl, TII->get(PPC::BC))
7050         .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
7051     } else {
7052       unsigned SelectPred = MI->getOperand(4).getImm();
7053       BuildMI(BB, dl, TII->get(PPC::BCC))
7054         .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
7055     }
7056
7057     //  copy0MBB:
7058     //   %FalseValue = ...
7059     //   # fallthrough to sinkMBB
7060     BB = copy0MBB;
7061
7062     // Update machine-CFG edges
7063     BB->addSuccessor(sinkMBB);
7064
7065     //  sinkMBB:
7066     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
7067     //  ...
7068     BB = sinkMBB;
7069     BuildMI(*BB, BB->begin(), dl,
7070             TII->get(PPC::PHI), MI->getOperand(0).getReg())
7071       .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
7072       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
7073   }
7074   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
7075     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
7076   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
7077     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
7078   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
7079     BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
7080   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
7081     BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
7082
7083   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
7084     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
7085   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
7086     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
7087   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
7088     BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
7089   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
7090     BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
7091
7092   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
7093     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
7094   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
7095     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
7096   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
7097     BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
7098   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
7099     BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
7100
7101   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
7102     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
7103   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
7104     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
7105   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
7106     BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
7107   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
7108     BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
7109
7110   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
7111     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
7112   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
7113     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
7114   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
7115     BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
7116   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
7117     BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
7118
7119   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
7120     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
7121   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
7122     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
7123   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
7124     BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
7125   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
7126     BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
7127
7128   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
7129     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
7130   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
7131     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
7132   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
7133     BB = EmitAtomicBinary(MI, BB, false, 0);
7134   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
7135     BB = EmitAtomicBinary(MI, BB, true, 0);
7136
7137   else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
7138            MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
7139     bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
7140
7141     unsigned dest   = MI->getOperand(0).getReg();
7142     unsigned ptrA   = MI->getOperand(1).getReg();
7143     unsigned ptrB   = MI->getOperand(2).getReg();
7144     unsigned oldval = MI->getOperand(3).getReg();
7145     unsigned newval = MI->getOperand(4).getReg();
7146     DebugLoc dl     = MI->getDebugLoc();
7147
7148     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
7149     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
7150     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
7151     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
7152     F->insert(It, loop1MBB);
7153     F->insert(It, loop2MBB);
7154     F->insert(It, midMBB);
7155     F->insert(It, exitMBB);
7156     exitMBB->splice(exitMBB->begin(), BB,
7157                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
7158     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
7159
7160     //  thisMBB:
7161     //   ...
7162     //   fallthrough --> loopMBB
7163     BB->addSuccessor(loop1MBB);
7164
7165     // loop1MBB:
7166     //   l[wd]arx dest, ptr
7167     //   cmp[wd] dest, oldval
7168     //   bne- midMBB
7169     // loop2MBB:
7170     //   st[wd]cx. newval, ptr
7171     //   bne- loopMBB
7172     //   b exitBB
7173     // midMBB:
7174     //   st[wd]cx. dest, ptr
7175     // exitBB:
7176     BB = loop1MBB;
7177     BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
7178       .addReg(ptrA).addReg(ptrB);
7179     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
7180       .addReg(oldval).addReg(dest);
7181     BuildMI(BB, dl, TII->get(PPC::BCC))
7182       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
7183     BB->addSuccessor(loop2MBB);
7184     BB->addSuccessor(midMBB);
7185
7186     BB = loop2MBB;
7187     BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
7188       .addReg(newval).addReg(ptrA).addReg(ptrB);
7189     BuildMI(BB, dl, TII->get(PPC::BCC))
7190       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
7191     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
7192     BB->addSuccessor(loop1MBB);
7193     BB->addSuccessor(exitMBB);
7194
7195     BB = midMBB;
7196     BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
7197       .addReg(dest).addReg(ptrA).addReg(ptrB);
7198     BB->addSuccessor(exitMBB);
7199
7200     //  exitMBB:
7201     //   ...
7202     BB = exitMBB;
7203   } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
7204              MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
7205     // We must use 64-bit registers for addresses when targeting 64-bit,
7206     // since we're actually doing arithmetic on them.  Other registers
7207     // can be 32-bit.
7208     bool is64bit = Subtarget.isPPC64();
7209     bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
7210
7211     unsigned dest   = MI->getOperand(0).getReg();
7212     unsigned ptrA   = MI->getOperand(1).getReg();
7213     unsigned ptrB   = MI->getOperand(2).getReg();
7214     unsigned oldval = MI->getOperand(3).getReg();
7215     unsigned newval = MI->getOperand(4).getReg();
7216     DebugLoc dl     = MI->getDebugLoc();
7217
7218     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
7219     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
7220     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
7221     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
7222     F->insert(It, loop1MBB);
7223     F->insert(It, loop2MBB);
7224     F->insert(It, midMBB);
7225     F->insert(It, exitMBB);
7226     exitMBB->splice(exitMBB->begin(), BB,
7227                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
7228     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
7229
7230     MachineRegisterInfo &RegInfo = F->getRegInfo();
7231     const TargetRegisterClass *RC =
7232       is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
7233                 (const TargetRegisterClass *) &PPC::GPRCRegClass;
7234     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
7235     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
7236     unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
7237     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
7238     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
7239     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
7240     unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
7241     unsigned MaskReg = RegInfo.createVirtualRegister(RC);
7242     unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
7243     unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
7244     unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
7245     unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
7246     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
7247     unsigned Ptr1Reg;
7248     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
7249     unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
7250     //  thisMBB:
7251     //   ...
7252     //   fallthrough --> loopMBB
7253     BB->addSuccessor(loop1MBB);
7254
7255     // The 4-byte load must be aligned, while a char or short may be
7256     // anywhere in the word.  Hence all this nasty bookkeeping code.
7257     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
7258     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
7259     //   xori shift, shift1, 24 [16]
7260     //   rlwinm ptr, ptr1, 0, 0, 29
7261     //   slw newval2, newval, shift
7262     //   slw oldval2, oldval,shift
7263     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
7264     //   slw mask, mask2, shift
7265     //   and newval3, newval2, mask
7266     //   and oldval3, oldval2, mask
7267     // loop1MBB:
7268     //   lwarx tmpDest, ptr
7269     //   and tmp, tmpDest, mask
7270     //   cmpw tmp, oldval3
7271     //   bne- midMBB
7272     // loop2MBB:
7273     //   andc tmp2, tmpDest, mask
7274     //   or tmp4, tmp2, newval3
7275     //   stwcx. tmp4, ptr
7276     //   bne- loop1MBB
7277     //   b exitBB
7278     // midMBB:
7279     //   stwcx. tmpDest, ptr
7280     // exitBB:
7281     //   srw dest, tmpDest, shift
7282     if (ptrA != ZeroReg) {
7283       Ptr1Reg = RegInfo.createVirtualRegister(RC);
7284       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
7285         .addReg(ptrA).addReg(ptrB);
7286     } else {
7287       Ptr1Reg = ptrB;
7288     }
7289     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
7290         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
7291     BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
7292         .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
7293     if (is64bit)
7294       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
7295         .addReg(Ptr1Reg).addImm(0).addImm(61);
7296     else
7297       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
7298         .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
7299     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
7300         .addReg(newval).addReg(ShiftReg);
7301     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
7302         .addReg(oldval).addReg(ShiftReg);
7303     if (is8bit)
7304       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
7305     else {
7306       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
7307       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
7308         .addReg(Mask3Reg).addImm(65535);
7309     }
7310     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
7311         .addReg(Mask2Reg).addReg(ShiftReg);
7312     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
7313         .addReg(NewVal2Reg).addReg(MaskReg);
7314     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
7315         .addReg(OldVal2Reg).addReg(MaskReg);
7316
7317     BB = loop1MBB;
7318     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
7319         .addReg(ZeroReg).addReg(PtrReg);
7320     BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
7321         .addReg(TmpDestReg).addReg(MaskReg);
7322     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
7323         .addReg(TmpReg).addReg(OldVal3Reg);
7324     BuildMI(BB, dl, TII->get(PPC::BCC))
7325         .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
7326     BB->addSuccessor(loop2MBB);
7327     BB->addSuccessor(midMBB);
7328
7329     BB = loop2MBB;
7330     BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
7331         .addReg(TmpDestReg).addReg(MaskReg);
7332     BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
7333         .addReg(Tmp2Reg).addReg(NewVal3Reg);
7334     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
7335         .addReg(ZeroReg).addReg(PtrReg);
7336     BuildMI(BB, dl, TII->get(PPC::BCC))
7337       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
7338     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
7339     BB->addSuccessor(loop1MBB);
7340     BB->addSuccessor(exitMBB);
7341
7342     BB = midMBB;
7343     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
7344       .addReg(ZeroReg).addReg(PtrReg);
7345     BB->addSuccessor(exitMBB);
7346
7347     //  exitMBB:
7348     //   ...
7349     BB = exitMBB;
7350     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
7351       .addReg(ShiftReg);
7352   } else if (MI->getOpcode() == PPC::FADDrtz) {
7353     // This pseudo performs an FADD with rounding mode temporarily forced
7354     // to round-to-zero.  We emit this via custom inserter since the FPSCR
7355     // is not modeled at the SelectionDAG level.
7356     unsigned Dest = MI->getOperand(0).getReg();
7357     unsigned Src1 = MI->getOperand(1).getReg();
7358     unsigned Src2 = MI->getOperand(2).getReg();
7359     DebugLoc dl   = MI->getDebugLoc();
7360
7361     MachineRegisterInfo &RegInfo = F->getRegInfo();
7362     unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
7363
7364     // Save FPSCR value.
7365     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
7366
7367     // Set rounding mode to round-to-zero.
7368     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
7369     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
7370
7371     // Perform addition.
7372     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
7373
7374     // Restore FPSCR value.
7375     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
7376   } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
7377              MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
7378              MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
7379              MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {
7380     unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
7381                        MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?
7382                       PPC::ANDIo8 : PPC::ANDIo;
7383     bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
7384                  MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);
7385
7386     MachineRegisterInfo &RegInfo = F->getRegInfo();
7387     unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
7388                                                   &PPC::GPRCRegClass :
7389                                                   &PPC::G8RCRegClass);
7390
7391     DebugLoc dl   = MI->getDebugLoc();
7392     BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
7393       .addReg(MI->getOperand(1).getReg()).addImm(1);
7394     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
7395             MI->getOperand(0).getReg())
7396       .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
7397   } else {
7398     llvm_unreachable("Unexpected instr type to insert");
7399   }
7400
7401   MI->eraseFromParent();   // The pseudo instruction is gone now.
7402   return BB;
7403 }
7404
7405 //===----------------------------------------------------------------------===//
7406 // Target Optimization Hooks
7407 //===----------------------------------------------------------------------===//
7408
7409 SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
7410                                                DAGCombinerInfo &DCI) const {
7411   if (DCI.isAfterLegalizeVectorOps())
7412     return SDValue();
7413
7414   EVT VT = Op.getValueType();
7415
7416   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
7417       (VT == MVT::f64 && Subtarget.hasFRE())  ||
7418       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
7419       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
7420
7421     // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
7422     // For the reciprocal, we need to find the zero of the function:
7423     //   F(X) = A X - 1 [which has a zero at X = 1/A]
7424     //     =>
7425     //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
7426     //     does not require additional intermediate precision]
7427
7428     // Convergence is quadratic, so we essentially double the number of digits
7429     // correct after every iteration. The minimum architected relative
7430     // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
7431     // 23 digits and double has 52 digits.
7432     int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
7433     if (VT.getScalarType() == MVT::f64)
7434       ++Iterations;
7435
7436     SelectionDAG &DAG = DCI.DAG;
7437     SDLoc dl(Op);
7438
7439     SDValue FPOne =
7440       DAG.getConstantFP(1.0, VT.getScalarType());
7441     if (VT.isVector()) {
7442       assert(VT.getVectorNumElements() == 4 &&
7443              "Unknown vector type");
7444       FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
7445                           FPOne, FPOne, FPOne, FPOne);
7446     }
7447
7448     SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
7449     DCI.AddToWorklist(Est.getNode());
7450
7451     // Newton iterations: Est = Est + Est (1 - Arg * Est)
7452     for (int i = 0; i < Iterations; ++i) {
7453       SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
7454       DCI.AddToWorklist(NewEst.getNode());
7455
7456       NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
7457       DCI.AddToWorklist(NewEst.getNode());
7458
7459       NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
7460       DCI.AddToWorklist(NewEst.getNode());
7461
7462       Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
7463       DCI.AddToWorklist(Est.getNode());
7464     }
7465
7466     return Est;
7467   }
7468
7469   return SDValue();
7470 }
7471
7472 SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
7473                                              DAGCombinerInfo &DCI) const {
7474   if (DCI.isAfterLegalizeVectorOps())
7475     return SDValue();
7476
7477   EVT VT = Op.getValueType();
7478
7479   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
7480       (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
7481       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
7482       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
7483
7484     // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
7485     // For the reciprocal sqrt, we need to find the zero of the function:
7486     //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
7487     //     =>
7488     //   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
7489     // As a result, we precompute A/2 prior to the iteration loop.
7490
7491     // Convergence is quadratic, so we essentially double the number of digits
7492     // correct after every iteration. The minimum architected relative
7493     // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
7494     // 23 digits and double has 52 digits.
7495     int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
7496     if (VT.getScalarType() == MVT::f64)
7497       ++Iterations;
7498
7499     SelectionDAG &DAG = DCI.DAG;
7500     SDLoc dl(Op);
7501
7502     SDValue FPThreeHalves =
7503       DAG.getConstantFP(1.5, VT.getScalarType());
7504     if (VT.isVector()) {
7505       assert(VT.getVectorNumElements() == 4 &&
7506              "Unknown vector type");
7507       FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
7508                                   FPThreeHalves, FPThreeHalves,
7509                                   FPThreeHalves, FPThreeHalves);
7510     }
7511
7512     SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
7513     DCI.AddToWorklist(Est.getNode());
7514
7515     // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
7516     // this entire sequence requires only one FP constant.
7517     SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
7518     DCI.AddToWorklist(HalfArg.getNode());
7519
7520     HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
7521     DCI.AddToWorklist(HalfArg.getNode());
7522
7523     // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
7524     for (int i = 0; i < Iterations; ++i) {
7525       SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
7526       DCI.AddToWorklist(NewEst.getNode());
7527
7528       NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
7529       DCI.AddToWorklist(NewEst.getNode());
7530
7531       NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
7532       DCI.AddToWorklist(NewEst.getNode());
7533
7534       Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
7535       DCI.AddToWorklist(Est.getNode());
7536     }
7537
7538     return Est;
7539   }
7540
7541   return SDValue();
7542 }
7543
7544 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
7545                             unsigned Bytes, int Dist,
7546                             SelectionDAG &DAG) {
7547   if (VT.getSizeInBits() / 8 != Bytes)
7548     return false;
7549
7550   SDValue BaseLoc = Base->getBasePtr();
7551   if (Loc.getOpcode() == ISD::FrameIndex) {
7552     if (BaseLoc.getOpcode() != ISD::FrameIndex)
7553       return false;
7554     const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
7555     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
7556     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
7557     int FS  = MFI->getObjectSize(FI);
7558     int BFS = MFI->getObjectSize(BFI);
7559     if (FS != BFS || FS != (int)Bytes) return false;
7560     return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
7561   }
7562
7563   // Handle X+C
7564   if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
7565       cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
7566     return true;
7567
7568   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7569   const GlobalValue *GV1 = nullptr;
7570   const GlobalValue *GV2 = nullptr;
7571   int64_t Offset1 = 0;
7572   int64_t Offset2 = 0;
7573   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
7574   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
7575   if (isGA1 && isGA2 && GV1 == GV2)
7576     return Offset1 == (Offset2 + Dist*Bytes);
7577   return false;
7578 }
7579
7580 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
7581 // not enforce equality of the chain operands.
7582 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
7583                             unsigned Bytes, int Dist,
7584                             SelectionDAG &DAG) {
7585   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
7586     EVT VT = LS->getMemoryVT();
7587     SDValue Loc = LS->getBasePtr();
7588     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
7589   }
7590
7591   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
7592     EVT VT;
7593     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
7594     default: return false;
7595     case Intrinsic::ppc_altivec_lvx:
7596     case Intrinsic::ppc_altivec_lvxl:
7597       VT = MVT::v4i32;
7598       break;
7599     case Intrinsic::ppc_altivec_lvebx:
7600       VT = MVT::i8;
7601       break;
7602     case Intrinsic::ppc_altivec_lvehx:
7603       VT = MVT::i16;
7604       break;
7605     case Intrinsic::ppc_altivec_lvewx:
7606       VT = MVT::i32;
7607       break;
7608     }
7609
7610     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
7611   }
7612
7613   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
7614     EVT VT;
7615     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
7616     default: return false;
7617     case Intrinsic::ppc_altivec_stvx:
7618     case Intrinsic::ppc_altivec_stvxl:
7619       VT = MVT::v4i32;
7620       break;
7621     case Intrinsic::ppc_altivec_stvebx:
7622       VT = MVT::i8;
7623       break;
7624     case Intrinsic::ppc_altivec_stvehx:
7625       VT = MVT::i16;
7626       break;
7627     case Intrinsic::ppc_altivec_stvewx:
7628       VT = MVT::i32;
7629       break;
7630     }
7631
7632     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
7633   }
7634
7635   return false;
7636 }
7637
7638 // Return true is there is a nearyby consecutive load to the one provided
7639 // (regardless of alignment). We search up and down the chain, looking though
7640 // token factors and other loads (but nothing else). As a result, a true result
7641 // indicates that it is safe to create a new consecutive load adjacent to the
7642 // load provided.
7643 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
7644   SDValue Chain = LD->getChain();
7645   EVT VT = LD->getMemoryVT();
7646
7647   SmallSet<SDNode *, 16> LoadRoots;
7648   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
7649   SmallSet<SDNode *, 16> Visited;
7650
7651   // First, search up the chain, branching to follow all token-factor operands.
7652   // If we find a consecutive load, then we're done, otherwise, record all
7653   // nodes just above the top-level loads and token factors.
7654   while (!Queue.empty()) {
7655     SDNode *ChainNext = Queue.pop_back_val();
7656     if (!Visited.insert(ChainNext))
7657       continue;
7658
7659     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
7660       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
7661         return true;
7662
7663       if (!Visited.count(ChainLD->getChain().getNode()))
7664         Queue.push_back(ChainLD->getChain().getNode());
7665     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
7666       for (const SDUse &O : ChainNext->ops())
7667         if (!Visited.count(O.getNode()))
7668           Queue.push_back(O.getNode());
7669     } else
7670       LoadRoots.insert(ChainNext);
7671   }
7672
7673   // Second, search down the chain, starting from the top-level nodes recorded
7674   // in the first phase. These top-level nodes are the nodes just above all
7675   // loads and token factors. Starting with their uses, recursively look though
7676   // all loads (just the chain uses) and token factors to find a consecutive
7677   // load.
7678   Visited.clear();
7679   Queue.clear();
7680
7681   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
7682        IE = LoadRoots.end(); I != IE; ++I) {
7683     Queue.push_back(*I);
7684
7685     while (!Queue.empty()) {
7686       SDNode *LoadRoot = Queue.pop_back_val();
7687       if (!Visited.insert(LoadRoot))
7688         continue;
7689
7690       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
7691         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
7692           return true;
7693
7694       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
7695            UE = LoadRoot->use_end(); UI != UE; ++UI)
7696         if (((isa<MemSDNode>(*UI) &&
7697             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
7698             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
7699           Queue.push_back(*UI);
7700     }
7701   }
7702
7703   return false;
7704 }
7705
7706 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
7707                                                   DAGCombinerInfo &DCI) const {
7708   SelectionDAG &DAG = DCI.DAG;
7709   SDLoc dl(N);
7710
7711   assert(Subtarget.useCRBits() &&
7712          "Expecting to be tracking CR bits");
7713   // If we're tracking CR bits, we need to be careful that we don't have:
7714   //   trunc(binary-ops(zext(x), zext(y)))
7715   // or
7716   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
7717   // such that we're unnecessarily moving things into GPRs when it would be
7718   // better to keep them in CR bits.
7719
7720   // Note that trunc here can be an actual i1 trunc, or can be the effective
7721   // truncation that comes from a setcc or select_cc.
7722   if (N->getOpcode() == ISD::TRUNCATE &&
7723       N->getValueType(0) != MVT::i1)
7724     return SDValue();
7725
7726   if (N->getOperand(0).getValueType() != MVT::i32 &&
7727       N->getOperand(0).getValueType() != MVT::i64)
7728     return SDValue();
7729
7730   if (N->getOpcode() == ISD::SETCC ||
7731       N->getOpcode() == ISD::SELECT_CC) {
7732     // If we're looking at a comparison, then we need to make sure that the
7733     // high bits (all except for the first) don't matter the result.
7734     ISD::CondCode CC =
7735       cast<CondCodeSDNode>(N->getOperand(
7736         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
7737     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
7738
7739     if (ISD::isSignedIntSetCC(CC)) {
7740       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
7741           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
7742         return SDValue();
7743     } else if (ISD::isUnsignedIntSetCC(CC)) {
7744       if (!DAG.MaskedValueIsZero(N->getOperand(0),
7745                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
7746           !DAG.MaskedValueIsZero(N->getOperand(1),
7747                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
7748         return SDValue();
7749     } else {
7750       // This is neither a signed nor an unsigned comparison, just make sure
7751       // that the high bits are equal.
7752       APInt Op1Zero, Op1One;
7753       APInt Op2Zero, Op2One;
7754       DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
7755       DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
7756
7757       // We don't really care about what is known about the first bit (if
7758       // anything), so clear it in all masks prior to comparing them.
7759       Op1Zero.clearBit(0); Op1One.clearBit(0);
7760       Op2Zero.clearBit(0); Op2One.clearBit(0);
7761
7762       if (Op1Zero != Op2Zero || Op1One != Op2One)
7763         return SDValue();
7764     }
7765   }
7766
7767   // We now know that the higher-order bits are irrelevant, we just need to
7768   // make sure that all of the intermediate operations are bit operations, and
7769   // all inputs are extensions.
7770   if (N->getOperand(0).getOpcode() != ISD::AND &&
7771       N->getOperand(0).getOpcode() != ISD::OR  &&
7772       N->getOperand(0).getOpcode() != ISD::XOR &&
7773       N->getOperand(0).getOpcode() != ISD::SELECT &&
7774       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
7775       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
7776       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
7777       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
7778       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
7779     return SDValue();
7780
7781   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
7782       N->getOperand(1).getOpcode() != ISD::AND &&
7783       N->getOperand(1).getOpcode() != ISD::OR  &&
7784       N->getOperand(1).getOpcode() != ISD::XOR &&
7785       N->getOperand(1).getOpcode() != ISD::SELECT &&
7786       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
7787       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
7788       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
7789       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
7790       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
7791     return SDValue();
7792
7793   SmallVector<SDValue, 4> Inputs;
7794   SmallVector<SDValue, 8> BinOps, PromOps;
7795   SmallPtrSet<SDNode *, 16> Visited;
7796
7797   for (unsigned i = 0; i < 2; ++i) {
7798     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
7799           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
7800           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
7801           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
7802         isa<ConstantSDNode>(N->getOperand(i)))
7803       Inputs.push_back(N->getOperand(i));
7804     else
7805       BinOps.push_back(N->getOperand(i));
7806
7807     if (N->getOpcode() == ISD::TRUNCATE)
7808       break;
7809   }
7810
7811   // Visit all inputs, collect all binary operations (and, or, xor and
7812   // select) that are all fed by extensions.
7813   while (!BinOps.empty()) {
7814     SDValue BinOp = BinOps.back();
7815     BinOps.pop_back();
7816
7817     if (!Visited.insert(BinOp.getNode()))
7818       continue;
7819
7820     PromOps.push_back(BinOp);
7821
7822     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
7823       // The condition of the select is not promoted.
7824       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
7825         continue;
7826       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
7827         continue;
7828
7829       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
7830             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
7831             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
7832            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
7833           isa<ConstantSDNode>(BinOp.getOperand(i))) {
7834         Inputs.push_back(BinOp.getOperand(i));
7835       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
7836                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
7837                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
7838                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
7839                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
7840                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
7841                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
7842                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
7843                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
7844         BinOps.push_back(BinOp.getOperand(i));
7845       } else {
7846         // We have an input that is not an extension or another binary
7847         // operation; we'll abort this transformation.
7848         return SDValue();
7849       }
7850     }
7851   }
7852
7853   // Make sure that this is a self-contained cluster of operations (which
7854   // is not quite the same thing as saying that everything has only one
7855   // use).
7856   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
7857     if (isa<ConstantSDNode>(Inputs[i]))
7858       continue;
7859
7860     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
7861                               UE = Inputs[i].getNode()->use_end();
7862          UI != UE; ++UI) {
7863       SDNode *User = *UI;
7864       if (User != N && !Visited.count(User))
7865         return SDValue();
7866
7867       // Make sure that we're not going to promote the non-output-value
7868       // operand(s) or SELECT or SELECT_CC.
7869       // FIXME: Although we could sometimes handle this, and it does occur in
7870       // practice that one of the condition inputs to the select is also one of
7871       // the outputs, we currently can't deal with this.
7872       if (User->getOpcode() == ISD::SELECT) {
7873         if (User->getOperand(0) == Inputs[i])
7874           return SDValue();
7875       } else if (User->getOpcode() == ISD::SELECT_CC) {
7876         if (User->getOperand(0) == Inputs[i] ||
7877             User->getOperand(1) == Inputs[i])
7878           return SDValue();
7879       }
7880     }
7881   }
7882
7883   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
7884     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
7885                               UE = PromOps[i].getNode()->use_end();
7886          UI != UE; ++UI) {
7887       SDNode *User = *UI;
7888       if (User != N && !Visited.count(User))
7889         return SDValue();
7890
7891       // Make sure that we're not going to promote the non-output-value
7892       // operand(s) or SELECT or SELECT_CC.
7893       // FIXME: Although we could sometimes handle this, and it does occur in
7894       // practice that one of the condition inputs to the select is also one of
7895       // the outputs, we currently can't deal with this.
7896       if (User->getOpcode() == ISD::SELECT) {
7897         if (User->getOperand(0) == PromOps[i])
7898           return SDValue();
7899       } else if (User->getOpcode() == ISD::SELECT_CC) {
7900         if (User->getOperand(0) == PromOps[i] ||
7901             User->getOperand(1) == PromOps[i])
7902           return SDValue();
7903       }
7904     }
7905   }
7906
7907   // Replace all inputs with the extension operand.
7908   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
7909     // Constants may have users outside the cluster of to-be-promoted nodes,
7910     // and so we need to replace those as we do the promotions.
7911     if (isa<ConstantSDNode>(Inputs[i]))
7912       continue;
7913     else
7914       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
7915   }
7916
7917   // Replace all operations (these are all the same, but have a different
7918   // (i1) return type). DAG.getNode will validate that the types of
7919   // a binary operator match, so go through the list in reverse so that
7920   // we've likely promoted both operands first. Any intermediate truncations or
7921   // extensions disappear.
7922   while (!PromOps.empty()) {
7923     SDValue PromOp = PromOps.back();
7924     PromOps.pop_back();
7925
7926     if (PromOp.getOpcode() == ISD::TRUNCATE ||
7927         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
7928         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
7929         PromOp.getOpcode() == ISD::ANY_EXTEND) {
7930       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
7931           PromOp.getOperand(0).getValueType() != MVT::i1) {
7932         // The operand is not yet ready (see comment below).
7933         PromOps.insert(PromOps.begin(), PromOp);
7934         continue;
7935       }
7936
7937       SDValue RepValue = PromOp.getOperand(0);
7938       if (isa<ConstantSDNode>(RepValue))
7939         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
7940
7941       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
7942       continue;
7943     }
7944
7945     unsigned C;
7946     switch (PromOp.getOpcode()) {
7947     default:             C = 0; break;
7948     case ISD::SELECT:    C = 1; break;
7949     case ISD::SELECT_CC: C = 2; break;
7950     }
7951
7952     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
7953          PromOp.getOperand(C).getValueType() != MVT::i1) ||
7954         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
7955          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
7956       // The to-be-promoted operands of this node have not yet been
7957       // promoted (this should be rare because we're going through the
7958       // list backward, but if one of the operands has several users in
7959       // this cluster of to-be-promoted nodes, it is possible).
7960       PromOps.insert(PromOps.begin(), PromOp);
7961       continue;
7962     }
7963
7964     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
7965                                 PromOp.getNode()->op_end());
7966
7967     // If there are any constant inputs, make sure they're replaced now.
7968     for (unsigned i = 0; i < 2; ++i)
7969       if (isa<ConstantSDNode>(Ops[C+i]))
7970         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
7971
7972     DAG.ReplaceAllUsesOfValueWith(PromOp,
7973       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
7974   }
7975
7976   // Now we're left with the initial truncation itself.
7977   if (N->getOpcode() == ISD::TRUNCATE)
7978     return N->getOperand(0);
7979
7980   // Otherwise, this is a comparison. The operands to be compared have just
7981   // changed type (to i1), but everything else is the same.
7982   return SDValue(N, 0);
7983 }
7984
7985 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
7986                                                   DAGCombinerInfo &DCI) const {
7987   SelectionDAG &DAG = DCI.DAG;
7988   SDLoc dl(N);
7989
7990   // If we're tracking CR bits, we need to be careful that we don't have:
7991   //   zext(binary-ops(trunc(x), trunc(y)))
7992   // or
7993   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
7994   // such that we're unnecessarily moving things into CR bits that can more
7995   // efficiently stay in GPRs. Note that if we're not certain that the high
7996   // bits are set as required by the final extension, we still may need to do
7997   // some masking to get the proper behavior.
7998
7999   // This same functionality is important on PPC64 when dealing with
8000   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
8001   // the return values of functions. Because it is so similar, it is handled
8002   // here as well.
8003
8004   if (N->getValueType(0) != MVT::i32 &&
8005       N->getValueType(0) != MVT::i64)
8006     return SDValue();
8007
8008   if (!((N->getOperand(0).getValueType() == MVT::i1 &&
8009         Subtarget.useCRBits()) ||
8010        (N->getOperand(0).getValueType() == MVT::i32 &&
8011         Subtarget.isPPC64())))
8012     return SDValue();
8013
8014   if (N->getOperand(0).getOpcode() != ISD::AND &&
8015       N->getOperand(0).getOpcode() != ISD::OR  &&
8016       N->getOperand(0).getOpcode() != ISD::XOR &&
8017       N->getOperand(0).getOpcode() != ISD::SELECT &&
8018       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
8019     return SDValue();
8020
8021   SmallVector<SDValue, 4> Inputs;
8022   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
8023   SmallPtrSet<SDNode *, 16> Visited;
8024
8025   // Visit all inputs, collect all binary operations (and, or, xor and
8026   // select) that are all fed by truncations.
8027   while (!BinOps.empty()) {
8028     SDValue BinOp = BinOps.back();
8029     BinOps.pop_back();
8030
8031     if (!Visited.insert(BinOp.getNode()))
8032       continue;
8033
8034     PromOps.push_back(BinOp);
8035
8036     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
8037       // The condition of the select is not promoted.
8038       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
8039         continue;
8040       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
8041         continue;
8042
8043       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
8044           isa<ConstantSDNode>(BinOp.getOperand(i))) {
8045         Inputs.push_back(BinOp.getOperand(i));
8046       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
8047                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
8048                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
8049                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
8050                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
8051         BinOps.push_back(BinOp.getOperand(i));
8052       } else {
8053         // We have an input that is not a truncation or another binary
8054         // operation; we'll abort this transformation.
8055         return SDValue();
8056       }
8057     }
8058   }
8059
8060   // Make sure that this is a self-contained cluster of operations (which
8061   // is not quite the same thing as saying that everything has only one
8062   // use).
8063   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
8064     if (isa<ConstantSDNode>(Inputs[i]))
8065       continue;
8066
8067     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
8068                               UE = Inputs[i].getNode()->use_end();
8069          UI != UE; ++UI) {
8070       SDNode *User = *UI;
8071       if (User != N && !Visited.count(User))
8072         return SDValue();
8073
8074       // Make sure that we're not going to promote the non-output-value
8075       // operand(s) or SELECT or SELECT_CC.
8076       // FIXME: Although we could sometimes handle this, and it does occur in
8077       // practice that one of the condition inputs to the select is also one of
8078       // the outputs, we currently can't deal with this.
8079       if (User->getOpcode() == ISD::SELECT) {
8080         if (User->getOperand(0) == Inputs[i])
8081           return SDValue();
8082       } else if (User->getOpcode() == ISD::SELECT_CC) {
8083         if (User->getOperand(0) == Inputs[i] ||
8084             User->getOperand(1) == Inputs[i])
8085           return SDValue();
8086       }
8087     }
8088   }
8089
8090   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
8091     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
8092                               UE = PromOps[i].getNode()->use_end();
8093          UI != UE; ++UI) {
8094       SDNode *User = *UI;
8095       if (User != N && !Visited.count(User))
8096         return SDValue();
8097
8098       // Make sure that we're not going to promote the non-output-value
8099       // operand(s) or SELECT or SELECT_CC.
8100       // FIXME: Although we could sometimes handle this, and it does occur in
8101       // practice that one of the condition inputs to the select is also one of
8102       // the outputs, we currently can't deal with this.
8103       if (User->getOpcode() == ISD::SELECT) {
8104         if (User->getOperand(0) == PromOps[i])
8105           return SDValue();
8106       } else if (User->getOpcode() == ISD::SELECT_CC) {
8107         if (User->getOperand(0) == PromOps[i] ||
8108             User->getOperand(1) == PromOps[i])
8109           return SDValue();
8110       }
8111     }
8112   }
8113
8114   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
8115   bool ReallyNeedsExt = false;
8116   if (N->getOpcode() != ISD::ANY_EXTEND) {
8117     // If all of the inputs are not already sign/zero extended, then
8118     // we'll still need to do that at the end.
8119     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
8120       if (isa<ConstantSDNode>(Inputs[i]))
8121         continue;
8122
8123       unsigned OpBits =
8124         Inputs[i].getOperand(0).getValueSizeInBits();
8125       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
8126
8127       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
8128            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
8129                                   APInt::getHighBitsSet(OpBits,
8130                                                         OpBits-PromBits))) ||
8131           (N->getOpcode() == ISD::SIGN_EXTEND &&
8132            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
8133              (OpBits-(PromBits-1)))) {
8134         ReallyNeedsExt = true;
8135         break;
8136       }
8137     }
8138   }
8139
8140   // Replace all inputs, either with the truncation operand, or a
8141   // truncation or extension to the final output type.
8142   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
8143     // Constant inputs need to be replaced with the to-be-promoted nodes that
8144     // use them because they might have users outside of the cluster of
8145     // promoted nodes.
8146     if (isa<ConstantSDNode>(Inputs[i]))
8147       continue;
8148
8149     SDValue InSrc = Inputs[i].getOperand(0);
8150     if (Inputs[i].getValueType() == N->getValueType(0))
8151       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
8152     else if (N->getOpcode() == ISD::SIGN_EXTEND)
8153       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
8154         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
8155     else if (N->getOpcode() == ISD::ZERO_EXTEND)
8156       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
8157         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
8158     else
8159       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
8160         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
8161   }
8162
8163   // Replace all operations (these are all the same, but have a different
8164   // (promoted) return type). DAG.getNode will validate that the types of
8165   // a binary operator match, so go through the list in reverse so that
8166   // we've likely promoted both operands first.
8167   while (!PromOps.empty()) {
8168     SDValue PromOp = PromOps.back();
8169     PromOps.pop_back();
8170
8171     unsigned C;
8172     switch (PromOp.getOpcode()) {
8173     default:             C = 0; break;
8174     case ISD::SELECT:    C = 1; break;
8175     case ISD::SELECT_CC: C = 2; break;
8176     }
8177
8178     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
8179          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
8180         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
8181          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
8182       // The to-be-promoted operands of this node have not yet been
8183       // promoted (this should be rare because we're going through the
8184       // list backward, but if one of the operands has several users in
8185       // this cluster of to-be-promoted nodes, it is possible).
8186       PromOps.insert(PromOps.begin(), PromOp);
8187       continue;
8188     }
8189
8190     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
8191                                 PromOp.getNode()->op_end());
8192
8193     // If this node has constant inputs, then they'll need to be promoted here.
8194     for (unsigned i = 0; i < 2; ++i) {
8195       if (!isa<ConstantSDNode>(Ops[C+i]))
8196         continue;
8197       if (Ops[C+i].getValueType() == N->getValueType(0))
8198         continue;
8199
8200       if (N->getOpcode() == ISD::SIGN_EXTEND)
8201         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
8202       else if (N->getOpcode() == ISD::ZERO_EXTEND)
8203         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
8204       else
8205         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
8206     }
8207
8208     DAG.ReplaceAllUsesOfValueWith(PromOp,
8209       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
8210   }
8211
8212   // Now we're left with the initial extension itself.
8213   if (!ReallyNeedsExt)
8214     return N->getOperand(0);
8215
8216   // To zero extend, just mask off everything except for the first bit (in the
8217   // i1 case).
8218   if (N->getOpcode() == ISD::ZERO_EXTEND)
8219     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
8220                        DAG.getConstant(APInt::getLowBitsSet(
8221                                          N->getValueSizeInBits(0), PromBits),
8222                                        N->getValueType(0)));
8223
8224   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
8225          "Invalid extension type");
8226   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
8227   SDValue ShiftCst =
8228     DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy);
8229   return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
8230                      DAG.getNode(ISD::SHL, dl, N->getValueType(0),
8231                                  N->getOperand(0), ShiftCst), ShiftCst);
8232 }
8233
8234 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
8235                                              DAGCombinerInfo &DCI) const {
8236   const TargetMachine &TM = getTargetMachine();
8237   SelectionDAG &DAG = DCI.DAG;
8238   SDLoc dl(N);
8239   switch (N->getOpcode()) {
8240   default: break;
8241   case PPCISD::SHL:
8242     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
8243       if (C->isNullValue())   // 0 << V -> 0.
8244         return N->getOperand(0);
8245     }
8246     break;
8247   case PPCISD::SRL:
8248     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
8249       if (C->isNullValue())   // 0 >>u V -> 0.
8250         return N->getOperand(0);
8251     }
8252     break;
8253   case PPCISD::SRA:
8254     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
8255       if (C->isNullValue() ||   //  0 >>s V -> 0.
8256           C->isAllOnesValue())    // -1 >>s V -> -1.
8257         return N->getOperand(0);
8258     }
8259     break;
8260   case ISD::SIGN_EXTEND:
8261   case ISD::ZERO_EXTEND:
8262   case ISD::ANY_EXTEND:
8263     return DAGCombineExtBoolTrunc(N, DCI);
8264   case ISD::TRUNCATE:
8265   case ISD::SETCC:
8266   case ISD::SELECT_CC:
8267     return DAGCombineTruncBoolExt(N, DCI);
8268   case ISD::FDIV: {
8269     assert(TM.Options.UnsafeFPMath &&
8270            "Reciprocal estimates require UnsafeFPMath");
8271
8272     if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
8273       SDValue RV =
8274         DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
8275       if (RV.getNode()) {
8276         DCI.AddToWorklist(RV.getNode());
8277         return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
8278                            N->getOperand(0), RV);
8279       }
8280     } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
8281                N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
8282       SDValue RV =
8283         DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
8284                                  DCI);
8285       if (RV.getNode()) {
8286         DCI.AddToWorklist(RV.getNode());
8287         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
8288                          N->getValueType(0), RV);
8289         DCI.AddToWorklist(RV.getNode());
8290         return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
8291                            N->getOperand(0), RV);
8292       }
8293     } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
8294                N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
8295       SDValue RV =
8296         DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
8297                                  DCI);
8298       if (RV.getNode()) {
8299         DCI.AddToWorklist(RV.getNode());
8300         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
8301                          N->getValueType(0), RV,
8302                          N->getOperand(1).getOperand(1));
8303         DCI.AddToWorklist(RV.getNode());
8304         return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
8305                            N->getOperand(0), RV);
8306       }
8307     }
8308
8309     SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
8310     if (RV.getNode()) {
8311       DCI.AddToWorklist(RV.getNode());
8312       return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
8313                          N->getOperand(0), RV);
8314     }
8315
8316     }
8317     break;
8318   case ISD::FSQRT: {
8319     assert(TM.Options.UnsafeFPMath &&
8320            "Reciprocal estimates require UnsafeFPMath");
8321
8322     // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
8323     // reciprocal sqrt.
8324     SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
8325     if (RV.getNode()) {
8326       DCI.AddToWorklist(RV.getNode());
8327       RV = DAGCombineFastRecip(RV, DCI);
8328       if (RV.getNode()) {
8329         // Unfortunately, RV is now NaN if the input was exactly 0. Select out
8330         // this case and force the answer to 0.
8331
8332         EVT VT = RV.getValueType();
8333
8334         SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
8335         if (VT.isVector()) {
8336           assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
8337           Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
8338         }
8339
8340         SDValue ZeroCmp =
8341           DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
8342                        N->getOperand(0), Zero, ISD::SETEQ);
8343         DCI.AddToWorklist(ZeroCmp.getNode());
8344         DCI.AddToWorklist(RV.getNode());
8345
8346         RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
8347                          ZeroCmp, Zero, RV);
8348         return RV;
8349       }
8350     }
8351
8352     }
8353     break;
8354   case ISD::SINT_TO_FP:
8355     if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
8356       if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
8357         // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
8358         // We allow the src/dst to be either f32/f64, but the intermediate
8359         // type must be i64.
8360         if (N->getOperand(0).getValueType() == MVT::i64 &&
8361             N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
8362           SDValue Val = N->getOperand(0).getOperand(0);
8363           if (Val.getValueType() == MVT::f32) {
8364             Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
8365             DCI.AddToWorklist(Val.getNode());
8366           }
8367
8368           Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
8369           DCI.AddToWorklist(Val.getNode());
8370           Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
8371           DCI.AddToWorklist(Val.getNode());
8372           if (N->getValueType(0) == MVT::f32) {
8373             Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
8374                               DAG.getIntPtrConstant(0));
8375             DCI.AddToWorklist(Val.getNode());
8376           }
8377           return Val;
8378         } else if (N->getOperand(0).getValueType() == MVT::i32) {
8379           // If the intermediate type is i32, we can avoid the load/store here
8380           // too.
8381         }
8382       }
8383     }
8384     break;
8385   case ISD::STORE:
8386     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
8387     if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
8388         !cast<StoreSDNode>(N)->isTruncatingStore() &&
8389         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
8390         N->getOperand(1).getValueType() == MVT::i32 &&
8391         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
8392       SDValue Val = N->getOperand(1).getOperand(0);
8393       if (Val.getValueType() == MVT::f32) {
8394         Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
8395         DCI.AddToWorklist(Val.getNode());
8396       }
8397       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
8398       DCI.AddToWorklist(Val.getNode());
8399
8400       SDValue Ops[] = {
8401         N->getOperand(0), Val, N->getOperand(2),
8402         DAG.getValueType(N->getOperand(1).getValueType())
8403       };
8404
8405       Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8406               DAG.getVTList(MVT::Other), Ops,
8407               cast<StoreSDNode>(N)->getMemoryVT(),
8408               cast<StoreSDNode>(N)->getMemOperand());
8409       DCI.AddToWorklist(Val.getNode());
8410       return Val;
8411     }
8412
8413     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
8414     if (cast<StoreSDNode>(N)->isUnindexed() &&
8415         N->getOperand(1).getOpcode() == ISD::BSWAP &&
8416         N->getOperand(1).getNode()->hasOneUse() &&
8417         (N->getOperand(1).getValueType() == MVT::i32 ||
8418          N->getOperand(1).getValueType() == MVT::i16 ||
8419          (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
8420           TM.getSubtarget<PPCSubtarget>().isPPC64() &&
8421           N->getOperand(1).getValueType() == MVT::i64))) {
8422       SDValue BSwapOp = N->getOperand(1).getOperand(0);
8423       // Do an any-extend to 32-bits if this is a half-word input.
8424       if (BSwapOp.getValueType() == MVT::i16)
8425         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
8426
8427       SDValue Ops[] = {
8428         N->getOperand(0), BSwapOp, N->getOperand(2),
8429         DAG.getValueType(N->getOperand(1).getValueType())
8430       };
8431       return
8432         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
8433                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
8434                                 cast<StoreSDNode>(N)->getMemOperand());
8435     }
8436     break;
8437   case ISD::LOAD: {
8438     LoadSDNode *LD = cast<LoadSDNode>(N);
8439     EVT VT = LD->getValueType(0);
8440     Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
8441     unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
8442     if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
8443         TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
8444         (VT == MVT::v16i8 || VT == MVT::v8i16 ||
8445          VT == MVT::v4i32 || VT == MVT::v4f32) &&
8446         LD->getAlignment() < ABIAlignment) {
8447       // This is a type-legal unaligned Altivec load.
8448       SDValue Chain = LD->getChain();
8449       SDValue Ptr = LD->getBasePtr();
8450       bool isLittleEndian = Subtarget.isLittleEndian();
8451
8452       // This implements the loading of unaligned vectors as described in
8453       // the venerable Apple Velocity Engine overview. Specifically:
8454       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
8455       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
8456       //
8457       // The general idea is to expand a sequence of one or more unaligned
8458       // loads into an alignment-based permutation-control instruction (lvsl
8459       // or lvsr), a series of regular vector loads (which always truncate
8460       // their input address to an aligned address), and a series of
8461       // permutations.  The results of these permutations are the requested
8462       // loaded values.  The trick is that the last "extra" load is not taken
8463       // from the address you might suspect (sizeof(vector) bytes after the
8464       // last requested load), but rather sizeof(vector) - 1 bytes after the
8465       // last requested vector. The point of this is to avoid a page fault if
8466       // the base address happened to be aligned. This works because if the
8467       // base address is aligned, then adding less than a full vector length
8468       // will cause the last vector in the sequence to be (re)loaded.
8469       // Otherwise, the next vector will be fetched as you might suspect was
8470       // necessary.
8471
8472       // We might be able to reuse the permutation generation from
8473       // a different base address offset from this one by an aligned amount.
8474       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
8475       // optimization later.
8476       Intrinsic::ID Intr = (isLittleEndian ?
8477                             Intrinsic::ppc_altivec_lvsr :
8478                             Intrinsic::ppc_altivec_lvsl);
8479       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
8480
8481       // Create the new MMO for the new base load. It is like the original MMO,
8482       // but represents an area in memory almost twice the vector size centered
8483       // on the original address. If the address is unaligned, we might start
8484       // reading up to (sizeof(vector)-1) bytes below the address of the
8485       // original unaligned load.
8486       MachineFunction &MF = DAG.getMachineFunction();
8487       MachineMemOperand *BaseMMO =
8488         MF.getMachineMemOperand(LD->getMemOperand(),
8489                                 -LD->getMemoryVT().getStoreSize()+1,
8490                                 2*LD->getMemoryVT().getStoreSize()-1);
8491
8492       // Create the new base load.
8493       SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx,
8494                                                getPointerTy());
8495       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
8496       SDValue BaseLoad =
8497         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
8498                                 DAG.getVTList(MVT::v4i32, MVT::Other),
8499                                 BaseLoadOps, MVT::v4i32, BaseMMO);
8500
8501       // Note that the value of IncOffset (which is provided to the next
8502       // load's pointer info offset value, and thus used to calculate the
8503       // alignment), and the value of IncValue (which is actually used to
8504       // increment the pointer value) are different! This is because we
8505       // require the next load to appear to be aligned, even though it
8506       // is actually offset from the base pointer by a lesser amount.
8507       int IncOffset = VT.getSizeInBits() / 8;
8508       int IncValue = IncOffset;
8509
8510       // Walk (both up and down) the chain looking for another load at the real
8511       // (aligned) offset (the alignment of the other load does not matter in
8512       // this case). If found, then do not use the offset reduction trick, as
8513       // that will prevent the loads from being later combined (as they would
8514       // otherwise be duplicates).
8515       if (!findConsecutiveLoad(LD, DAG))
8516         --IncValue;
8517
8518       SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
8519       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
8520
8521       MachineMemOperand *ExtraMMO =
8522         MF.getMachineMemOperand(LD->getMemOperand(),
8523                                 1, 2*LD->getMemoryVT().getStoreSize()-1);
8524       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
8525       SDValue ExtraLoad =
8526         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
8527                                 DAG.getVTList(MVT::v4i32, MVT::Other),
8528                                 ExtraLoadOps, MVT::v4i32, ExtraMMO);
8529
8530       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8531         BaseLoad.getValue(1), ExtraLoad.getValue(1));
8532
8533       // Because vperm has a big-endian bias, we must reverse the order
8534       // of the input vectors and complement the permute control vector
8535       // when generating little endian code.  We have already handled the
8536       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
8537       // and ExtraLoad here.
8538       SDValue Perm;
8539       if (isLittleEndian)
8540         Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
8541                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
8542       else
8543         Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
8544                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
8545
8546       if (VT != MVT::v4i32)
8547         Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
8548
8549       // The output of the permutation is our loaded result, the TokenFactor is
8550       // our new chain.
8551       DCI.CombineTo(N, Perm, TF);
8552       return SDValue(N, 0);
8553     }
8554     }
8555     break;
8556   case ISD::INTRINSIC_WO_CHAIN: {
8557     bool isLittleEndian = Subtarget.isLittleEndian();
8558     Intrinsic::ID Intr = (isLittleEndian ?
8559                           Intrinsic::ppc_altivec_lvsr :
8560                           Intrinsic::ppc_altivec_lvsl);
8561     if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
8562         N->getOperand(1)->getOpcode() == ISD::ADD) {
8563       SDValue Add = N->getOperand(1);
8564
8565       if (DAG.MaskedValueIsZero(Add->getOperand(1),
8566             APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
8567               Add.getValueType().getScalarType().getSizeInBits()))) {
8568         SDNode *BasePtr = Add->getOperand(0).getNode();
8569         for (SDNode::use_iterator UI = BasePtr->use_begin(),
8570              UE = BasePtr->use_end(); UI != UE; ++UI) {
8571           if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
8572               cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
8573                 Intr) {
8574             // We've found another LVSL/LVSR, and this address is an aligned
8575             // multiple of that one. The results will be the same, so use the
8576             // one we've just found instead.
8577
8578             return SDValue(*UI, 0);
8579           }
8580         }
8581       }
8582     }
8583     }
8584
8585     break;
8586   case ISD::BSWAP:
8587     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
8588     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8589         N->getOperand(0).hasOneUse() &&
8590         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
8591          (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
8592           TM.getSubtarget<PPCSubtarget>().isPPC64() &&
8593           N->getValueType(0) == MVT::i64))) {
8594       SDValue Load = N->getOperand(0);
8595       LoadSDNode *LD = cast<LoadSDNode>(Load);
8596       // Create the byte-swapping load.
8597       SDValue Ops[] = {
8598         LD->getChain(),    // Chain
8599         LD->getBasePtr(),  // Ptr
8600         DAG.getValueType(N->getValueType(0)) // VT
8601       };
8602       SDValue BSLoad =
8603         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
8604                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
8605                                               MVT::i64 : MVT::i32, MVT::Other),
8606                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
8607
8608       // If this is an i16 load, insert the truncate.
8609       SDValue ResVal = BSLoad;
8610       if (N->getValueType(0) == MVT::i16)
8611         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
8612
8613       // First, combine the bswap away.  This makes the value produced by the
8614       // load dead.
8615       DCI.CombineTo(N, ResVal);
8616
8617       // Next, combine the load away, we give it a bogus result value but a real
8618       // chain result.  The result value is dead because the bswap is dead.
8619       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
8620
8621       // Return N so it doesn't get rechecked!
8622       return SDValue(N, 0);
8623     }
8624
8625     break;
8626   case PPCISD::VCMP: {
8627     // If a VCMPo node already exists with exactly the same operands as this
8628     // node, use its result instead of this node (VCMPo computes both a CR6 and
8629     // a normal output).
8630     //
8631     if (!N->getOperand(0).hasOneUse() &&
8632         !N->getOperand(1).hasOneUse() &&
8633         !N->getOperand(2).hasOneUse()) {
8634
8635       // Scan all of the users of the LHS, looking for VCMPo's that match.
8636       SDNode *VCMPoNode = nullptr;
8637
8638       SDNode *LHSN = N->getOperand(0).getNode();
8639       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
8640            UI != E; ++UI)
8641         if (UI->getOpcode() == PPCISD::VCMPo &&
8642             UI->getOperand(1) == N->getOperand(1) &&
8643             UI->getOperand(2) == N->getOperand(2) &&
8644             UI->getOperand(0) == N->getOperand(0)) {
8645           VCMPoNode = *UI;
8646           break;
8647         }
8648
8649       // If there is no VCMPo node, or if the flag value has a single use, don't
8650       // transform this.
8651       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
8652         break;
8653
8654       // Look at the (necessarily single) use of the flag value.  If it has a
8655       // chain, this transformation is more complex.  Note that multiple things
8656       // could use the value result, which we should ignore.
8657       SDNode *FlagUser = nullptr;
8658       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
8659            FlagUser == nullptr; ++UI) {
8660         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
8661         SDNode *User = *UI;
8662         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
8663           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
8664             FlagUser = User;
8665             break;
8666           }
8667         }
8668       }
8669
8670       // If the user is a MFOCRF instruction, we know this is safe.
8671       // Otherwise we give up for right now.
8672       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
8673         return SDValue(VCMPoNode, 0);
8674     }
8675     break;
8676   }
8677   case ISD::BRCOND: {
8678     SDValue Cond = N->getOperand(1);
8679     SDValue Target = N->getOperand(2);
8680
8681     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
8682         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
8683           Intrinsic::ppc_is_decremented_ctr_nonzero) {
8684
8685       // We now need to make the intrinsic dead (it cannot be instruction
8686       // selected).
8687       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
8688       assert(Cond.getNode()->hasOneUse() &&
8689              "Counter decrement has more than one use");
8690
8691       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
8692                          N->getOperand(0), Target);
8693     }
8694   }
8695   break;
8696   case ISD::BR_CC: {
8697     // If this is a branch on an altivec predicate comparison, lower this so
8698     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
8699     // lowering is done pre-legalize, because the legalizer lowers the predicate
8700     // compare down to code that is difficult to reassemble.
8701     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
8702     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
8703
8704     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
8705     // value. If so, pass-through the AND to get to the intrinsic.
8706     if (LHS.getOpcode() == ISD::AND &&
8707         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
8708         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
8709           Intrinsic::ppc_is_decremented_ctr_nonzero &&
8710         isa<ConstantSDNode>(LHS.getOperand(1)) &&
8711         !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
8712           isZero())
8713       LHS = LHS.getOperand(0);
8714
8715     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
8716         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
8717           Intrinsic::ppc_is_decremented_ctr_nonzero &&
8718         isa<ConstantSDNode>(RHS)) {
8719       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
8720              "Counter decrement comparison is not EQ or NE");
8721
8722       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
8723       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
8724                     (CC == ISD::SETNE && !Val);
8725
8726       // We now need to make the intrinsic dead (it cannot be instruction
8727       // selected).
8728       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
8729       assert(LHS.getNode()->hasOneUse() &&
8730              "Counter decrement has more than one use");
8731
8732       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
8733                          N->getOperand(0), N->getOperand(4));
8734     }
8735
8736     int CompareOpc;
8737     bool isDot;
8738
8739     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
8740         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
8741         getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
8742       assert(isDot && "Can't compare against a vector result!");
8743
8744       // If this is a comparison against something other than 0/1, then we know
8745       // that the condition is never/always true.
8746       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
8747       if (Val != 0 && Val != 1) {
8748         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
8749           return N->getOperand(0);
8750         // Always !=, turn it into an unconditional branch.
8751         return DAG.getNode(ISD::BR, dl, MVT::Other,
8752                            N->getOperand(0), N->getOperand(4));
8753       }
8754
8755       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
8756
8757       // Create the PPCISD altivec 'dot' comparison node.
8758       SDValue Ops[] = {
8759         LHS.getOperand(2),  // LHS of compare
8760         LHS.getOperand(3),  // RHS of compare
8761         DAG.getConstant(CompareOpc, MVT::i32)
8762       };
8763       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
8764       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
8765
8766       // Unpack the result based on how the target uses it.
8767       PPC::Predicate CompOpc;
8768       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
8769       default:  // Can't happen, don't crash on invalid number though.
8770       case 0:   // Branch on the value of the EQ bit of CR6.
8771         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
8772         break;
8773       case 1:   // Branch on the inverted value of the EQ bit of CR6.
8774         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
8775         break;
8776       case 2:   // Branch on the value of the LT bit of CR6.
8777         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
8778         break;
8779       case 3:   // Branch on the inverted value of the LT bit of CR6.
8780         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
8781         break;
8782       }
8783
8784       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
8785                          DAG.getConstant(CompOpc, MVT::i32),
8786                          DAG.getRegister(PPC::CR6, MVT::i32),
8787                          N->getOperand(4), CompNode.getValue(1));
8788     }
8789     break;
8790   }
8791   }
8792
8793   return SDValue();
8794 }
8795
8796 //===----------------------------------------------------------------------===//
8797 // Inline Assembly Support
8798 //===----------------------------------------------------------------------===//
8799
8800 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
8801                                                       APInt &KnownZero,
8802                                                       APInt &KnownOne,
8803                                                       const SelectionDAG &DAG,
8804                                                       unsigned Depth) const {
8805   KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
8806   switch (Op.getOpcode()) {
8807   default: break;
8808   case PPCISD::LBRX: {
8809     // lhbrx is known to have the top bits cleared out.
8810     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
8811       KnownZero = 0xFFFF0000;
8812     break;
8813   }
8814   case ISD::INTRINSIC_WO_CHAIN: {
8815     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
8816     default: break;
8817     case Intrinsic::ppc_altivec_vcmpbfp_p:
8818     case Intrinsic::ppc_altivec_vcmpeqfp_p:
8819     case Intrinsic::ppc_altivec_vcmpequb_p:
8820     case Intrinsic::ppc_altivec_vcmpequh_p:
8821     case Intrinsic::ppc_altivec_vcmpequw_p:
8822     case Intrinsic::ppc_altivec_vcmpgefp_p:
8823     case Intrinsic::ppc_altivec_vcmpgtfp_p:
8824     case Intrinsic::ppc_altivec_vcmpgtsb_p:
8825     case Intrinsic::ppc_altivec_vcmpgtsh_p:
8826     case Intrinsic::ppc_altivec_vcmpgtsw_p:
8827     case Intrinsic::ppc_altivec_vcmpgtub_p:
8828     case Intrinsic::ppc_altivec_vcmpgtuh_p:
8829     case Intrinsic::ppc_altivec_vcmpgtuw_p:
8830       KnownZero = ~1U;  // All bits but the low one are known to be zero.
8831       break;
8832     }
8833   }
8834   }
8835 }
8836
8837
8838 /// getConstraintType - Given a constraint, return the type of
8839 /// constraint it is for this target.
8840 PPCTargetLowering::ConstraintType
8841 PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
8842   if (Constraint.size() == 1) {
8843     switch (Constraint[0]) {
8844     default: break;
8845     case 'b':
8846     case 'r':
8847     case 'f':
8848     case 'v':
8849     case 'y':
8850       return C_RegisterClass;
8851     case 'Z':
8852       // FIXME: While Z does indicate a memory constraint, it specifically
8853       // indicates an r+r address (used in conjunction with the 'y' modifier
8854       // in the replacement string). Currently, we're forcing the base
8855       // register to be r0 in the asm printer (which is interpreted as zero)
8856       // and forming the complete address in the second register. This is
8857       // suboptimal.
8858       return C_Memory;
8859     }
8860   } else if (Constraint == "wc") { // individual CR bits.
8861     return C_RegisterClass;
8862   } else if (Constraint == "wa" || Constraint == "wd" ||
8863              Constraint == "wf" || Constraint == "ws") {
8864     return C_RegisterClass; // VSX registers.
8865   }
8866   return TargetLowering::getConstraintType(Constraint);
8867 }
8868
8869 /// Examine constraint type and operand type and determine a weight value.
8870 /// This object must already have been set up with the operand type
8871 /// and the current alternative constraint selected.
8872 TargetLowering::ConstraintWeight
8873 PPCTargetLowering::getSingleConstraintMatchWeight(
8874     AsmOperandInfo &info, const char *constraint) const {
8875   ConstraintWeight weight = CW_Invalid;
8876   Value *CallOperandVal = info.CallOperandVal;
8877     // If we don't have a value, we can't do a match,
8878     // but allow it at the lowest weight.
8879   if (!CallOperandVal)
8880     return CW_Default;
8881   Type *type = CallOperandVal->getType();
8882
8883   // Look at the constraint type.
8884   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
8885     return CW_Register; // an individual CR bit.
8886   else if ((StringRef(constraint) == "wa" ||
8887             StringRef(constraint) == "wd" ||
8888             StringRef(constraint) == "wf") &&
8889            type->isVectorTy())
8890     return CW_Register;
8891   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
8892     return CW_Register;
8893
8894   switch (*constraint) {
8895   default:
8896     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
8897     break;
8898   case 'b':
8899     if (type->isIntegerTy())
8900       weight = CW_Register;
8901     break;
8902   case 'f':
8903     if (type->isFloatTy())
8904       weight = CW_Register;
8905     break;
8906   case 'd':
8907     if (type->isDoubleTy())
8908       weight = CW_Register;
8909     break;
8910   case 'v':
8911     if (type->isVectorTy())
8912       weight = CW_Register;
8913     break;
8914   case 'y':
8915     weight = CW_Register;
8916     break;
8917   case 'Z':
8918     weight = CW_Memory;
8919     break;
8920   }
8921   return weight;
8922 }
8923
8924 std::pair<unsigned, const TargetRegisterClass*>
8925 PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
8926                                                 MVT VT) const {
8927   if (Constraint.size() == 1) {
8928     // GCC RS6000 Constraint Letters
8929     switch (Constraint[0]) {
8930     case 'b':   // R1-R31
8931       if (VT == MVT::i64 && Subtarget.isPPC64())
8932         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
8933       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
8934     case 'r':   // R0-R31
8935       if (VT == MVT::i64 && Subtarget.isPPC64())
8936         return std::make_pair(0U, &PPC::G8RCRegClass);
8937       return std::make_pair(0U, &PPC::GPRCRegClass);
8938     case 'f':
8939       if (VT == MVT::f32 || VT == MVT::i32)
8940         return std::make_pair(0U, &PPC::F4RCRegClass);
8941       if (VT == MVT::f64 || VT == MVT::i64)
8942         return std::make_pair(0U, &PPC::F8RCRegClass);
8943       break;
8944     case 'v':
8945       return std::make_pair(0U, &PPC::VRRCRegClass);
8946     case 'y':   // crrc
8947       return std::make_pair(0U, &PPC::CRRCRegClass);
8948     }
8949   } else if (Constraint == "wc") { // an individual CR bit.
8950     return std::make_pair(0U, &PPC::CRBITRCRegClass);
8951   } else if (Constraint == "wa" || Constraint == "wd" ||
8952              Constraint == "wf") {
8953     return std::make_pair(0U, &PPC::VSRCRegClass);
8954   } else if (Constraint == "ws") {
8955     return std::make_pair(0U, &PPC::VSFRCRegClass);
8956   }
8957
8958   std::pair<unsigned, const TargetRegisterClass*> R =
8959     TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
8960
8961   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
8962   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
8963   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
8964   // register.
8965   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
8966   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
8967   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
8968       PPC::GPRCRegClass.contains(R.first)) {
8969     const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
8970     return std::make_pair(TRI->getMatchingSuperReg(R.first,
8971                             PPC::sub_32, &PPC::G8RCRegClass),
8972                           &PPC::G8RCRegClass);
8973   }
8974
8975   return R;
8976 }
8977
8978
8979 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
8980 /// vector.  If it is invalid, don't add anything to Ops.
8981 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
8982                                                      std::string &Constraint,
8983                                                      std::vector<SDValue>&Ops,
8984                                                      SelectionDAG &DAG) const {
8985   SDValue Result;
8986
8987   // Only support length 1 constraints.
8988   if (Constraint.length() > 1) return;
8989
8990   char Letter = Constraint[0];
8991   switch (Letter) {
8992   default: break;
8993   case 'I':
8994   case 'J':
8995   case 'K':
8996   case 'L':
8997   case 'M':
8998   case 'N':
8999   case 'O':
9000   case 'P': {
9001     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
9002     if (!CST) return; // Must be an immediate to match.
9003     unsigned Value = CST->getZExtValue();
9004     switch (Letter) {
9005     default: llvm_unreachable("Unknown constraint letter!");
9006     case 'I':  // "I" is a signed 16-bit constant.
9007       if ((short)Value == (int)Value)
9008         Result = DAG.getTargetConstant(Value, Op.getValueType());
9009       break;
9010     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
9011     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
9012       if ((short)Value == 0)
9013         Result = DAG.getTargetConstant(Value, Op.getValueType());
9014       break;
9015     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
9016       if ((Value >> 16) == 0)
9017         Result = DAG.getTargetConstant(Value, Op.getValueType());
9018       break;
9019     case 'M':  // "M" is a constant that is greater than 31.
9020       if (Value > 31)
9021         Result = DAG.getTargetConstant(Value, Op.getValueType());
9022       break;
9023     case 'N':  // "N" is a positive constant that is an exact power of two.
9024       if ((int)Value > 0 && isPowerOf2_32(Value))
9025         Result = DAG.getTargetConstant(Value, Op.getValueType());
9026       break;
9027     case 'O':  // "O" is the constant zero.
9028       if (Value == 0)
9029         Result = DAG.getTargetConstant(Value, Op.getValueType());
9030       break;
9031     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
9032       if ((short)-Value == (int)-Value)
9033         Result = DAG.getTargetConstant(Value, Op.getValueType());
9034       break;
9035     }
9036     break;
9037   }
9038   }
9039
9040   if (Result.getNode()) {
9041     Ops.push_back(Result);
9042     return;
9043   }
9044
9045   // Handle standard constraint letters.
9046   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
9047 }
9048
9049 // isLegalAddressingMode - Return true if the addressing mode represented
9050 // by AM is legal for this target, for a load/store of the specified type.
9051 bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
9052                                               Type *Ty) const {
9053   // FIXME: PPC does not allow r+i addressing modes for vectors!
9054
9055   // PPC allows a sign-extended 16-bit immediate field.
9056   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
9057     return false;
9058
9059   // No global is ever allowed as a base.
9060   if (AM.BaseGV)
9061     return false;
9062
9063   // PPC only support r+r,
9064   switch (AM.Scale) {
9065   case 0:  // "r+i" or just "i", depending on HasBaseReg.
9066     break;
9067   case 1:
9068     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
9069       return false;
9070     // Otherwise we have r+r or r+i.
9071     break;
9072   case 2:
9073     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
9074       return false;
9075     // Allow 2*r as r+r.
9076     break;
9077   default:
9078     // No other scales are supported.
9079     return false;
9080   }
9081
9082   return true;
9083 }
9084
9085 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
9086                                            SelectionDAG &DAG) const {
9087   MachineFunction &MF = DAG.getMachineFunction();
9088   MachineFrameInfo *MFI = MF.getFrameInfo();
9089   MFI->setReturnAddressIsTaken(true);
9090
9091   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
9092     return SDValue();
9093
9094   SDLoc dl(Op);
9095   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9096
9097   // Make sure the function does not optimize away the store of the RA to
9098   // the stack.
9099   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
9100   FuncInfo->setLRStoreRequired();
9101   bool isPPC64 = Subtarget.isPPC64();
9102   bool isDarwinABI = Subtarget.isDarwinABI();
9103
9104   if (Depth > 0) {
9105     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
9106     SDValue Offset =
9107
9108       DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
9109                       isPPC64? MVT::i64 : MVT::i32);
9110     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
9111                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
9112                                    FrameAddr, Offset),
9113                        MachinePointerInfo(), false, false, false, 0);
9114   }
9115
9116   // Just load the return address off the stack.
9117   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
9118   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
9119                      RetAddrFI, MachinePointerInfo(), false, false, false, 0);
9120 }
9121
9122 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
9123                                           SelectionDAG &DAG) const {
9124   SDLoc dl(Op);
9125   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9126
9127   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
9128   bool isPPC64 = PtrVT == MVT::i64;
9129
9130   MachineFunction &MF = DAG.getMachineFunction();
9131   MachineFrameInfo *MFI = MF.getFrameInfo();
9132   MFI->setFrameAddressIsTaken(true);
9133
9134   // Naked functions never have a frame pointer, and so we use r1. For all
9135   // other functions, this decision must be delayed until during PEI.
9136   unsigned FrameReg;
9137   if (MF.getFunction()->getAttributes().hasAttribute(
9138         AttributeSet::FunctionIndex, Attribute::Naked))
9139     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
9140   else
9141     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
9142
9143   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
9144                                          PtrVT);
9145   while (Depth--)
9146     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
9147                             FrameAddr, MachinePointerInfo(), false, false,
9148                             false, 0);
9149   return FrameAddr;
9150 }
9151
9152 // FIXME? Maybe this could be a TableGen attribute on some registers and
9153 // this table could be generated automatically from RegInfo.
9154 unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
9155                                               EVT VT) const {
9156   bool isPPC64 = Subtarget.isPPC64();
9157   bool isDarwinABI = Subtarget.isDarwinABI();
9158
9159   if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
9160       (!isPPC64 && VT != MVT::i32))
9161     report_fatal_error("Invalid register global variable type");
9162
9163   bool is64Bit = isPPC64 && VT == MVT::i64;
9164   unsigned Reg = StringSwitch<unsigned>(RegName)
9165                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
9166                    .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2))
9167                    .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
9168                                   (is64Bit ? PPC::X13 : PPC::R13))
9169                    .Default(0);
9170
9171   if (Reg)
9172     return Reg;
9173   report_fatal_error("Invalid register name global variable");
9174 }
9175
9176 bool
9177 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
9178   // The PowerPC target isn't yet aware of offsets.
9179   return false;
9180 }
9181
9182 /// getOptimalMemOpType - Returns the target specific optimal type for load
9183 /// and store operations as a result of memset, memcpy, and memmove
9184 /// lowering. If DstAlign is zero that means it's safe to destination
9185 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
9186 /// means there isn't a need to check it against alignment requirement,
9187 /// probably because the source does not need to be loaded. If 'IsMemset' is
9188 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
9189 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
9190 /// source is constant so it does not need to be loaded.
9191 /// It returns EVT::Other if the type should be determined using generic
9192 /// target-independent logic.
9193 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
9194                                            unsigned DstAlign, unsigned SrcAlign,
9195                                            bool IsMemset, bool ZeroMemset,
9196                                            bool MemcpyStrSrc,
9197                                            MachineFunction &MF) const {
9198   if (Subtarget.isPPC64()) {
9199     return MVT::i64;
9200   } else {
9201     return MVT::i32;
9202   }
9203 }
9204
9205 /// \brief Returns true if it is beneficial to convert a load of a constant
9206 /// to just the constant itself.
9207 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
9208                                                           Type *Ty) const {
9209   assert(Ty->isIntegerTy());
9210
9211   unsigned BitSize = Ty->getPrimitiveSizeInBits();
9212   if (BitSize == 0 || BitSize > 64)
9213     return false;
9214   return true;
9215 }
9216
9217 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
9218   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
9219     return false;
9220   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
9221   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
9222   return NumBits1 == 64 && NumBits2 == 32;
9223 }
9224
9225 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
9226   if (!VT1.isInteger() || !VT2.isInteger())
9227     return false;
9228   unsigned NumBits1 = VT1.getSizeInBits();
9229   unsigned NumBits2 = VT2.getSizeInBits();
9230   return NumBits1 == 64 && NumBits2 == 32;
9231 }
9232
9233 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
9234   return isInt<16>(Imm) || isUInt<16>(Imm);
9235 }
9236
9237 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
9238   return isInt<16>(Imm) || isUInt<16>(Imm);
9239 }
9240
9241 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
9242                                                        unsigned,
9243                                                        unsigned,
9244                                                        bool *Fast) const {
9245   if (DisablePPCUnaligned)
9246     return false;
9247
9248   // PowerPC supports unaligned memory access for simple non-vector types.
9249   // Although accessing unaligned addresses is not as efficient as accessing
9250   // aligned addresses, it is generally more efficient than manual expansion,
9251   // and generally only traps for software emulation when crossing page
9252   // boundaries.
9253
9254   if (!VT.isSimple())
9255     return false;
9256
9257   if (VT.getSimpleVT().isVector()) {
9258     if (Subtarget.hasVSX()) {
9259       if (VT != MVT::v2f64 && VT != MVT::v2i64)
9260         return false;
9261     } else {
9262       return false;
9263     }
9264   }
9265
9266   if (VT == MVT::ppcf128)
9267     return false;
9268
9269   if (Fast)
9270     *Fast = true;
9271
9272   return true;
9273 }
9274
9275 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
9276   VT = VT.getScalarType();
9277
9278   if (!VT.isSimple())
9279     return false;
9280
9281   switch (VT.getSimpleVT().SimpleTy) {
9282   case MVT::f32:
9283   case MVT::f64:
9284     return true;
9285   default:
9286     break;
9287   }
9288
9289   return false;
9290 }
9291
9292 bool
9293 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
9294                      EVT VT , unsigned DefinedValues) const {
9295   if (VT == MVT::v2i64)
9296     return false;
9297
9298   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
9299 }
9300
9301 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
9302   if (DisableILPPref || Subtarget.enableMachineScheduler())
9303     return TargetLowering::getSchedulingPreference(N);
9304
9305   return Sched::ILP;
9306 }
9307
9308 // Create a fast isel object.
9309 FastISel *
9310 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
9311                                   const TargetLibraryInfo *LibInfo) const {
9312   return PPC::createFastISel(FuncInfo, LibInfo);
9313 }