lib/Target/PowerPC/PPCISelLowering.cpp

   1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the PPCISelLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "PPCISelLowering.h"
  15 #include "MCTargetDesc/PPCPredicates.h"
  16 #include "PPCCallingConv.h"
  17 #include "PPCMachineFunctionInfo.h"
  18 #include "PPCPerfectShuffle.h"
  19 #include "PPCTargetMachine.h"
  20 #include "PPCTargetObjectFile.h"
  21 #include "llvm/ADT/STLExtras.h"
  22 #include "llvm/ADT/StringSwitch.h"
  23 #include "llvm/ADT/Triple.h"
  24 #include "llvm/CodeGen/CallingConvLower.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineFunction.h"
  27 #include "llvm/CodeGen/MachineInstrBuilder.h"
  28 #include "llvm/CodeGen/MachineLoopInfo.h"
  29 #include "llvm/CodeGen/MachineRegisterInfo.h"
  30 #include "llvm/CodeGen/SelectionDAG.h"
  31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  32 #include "llvm/IR/CallingConv.h"
  33 #include "llvm/IR/Constants.h"
  34 #include "llvm/IR/DerivedTypes.h"
  35 #include "llvm/IR/Function.h"
  36 #include "llvm/IR/Intrinsics.h"
  37 #include "llvm/Support/CommandLine.h"
  38 #include "llvm/Support/ErrorHandling.h"
  39 #include "llvm/Support/MathExtras.h"
  40 #include "llvm/Support/raw_ostream.h"
  41 #include "llvm/Target/TargetOptions.h"
  42
  43 using namespace llvm;
  44
  45 // FIXME: Remove this once soft-float is supported.
  46 static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic",
  47 cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden);
  48
  49 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
  50 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
  51
  52 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
  53 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
  54
  55 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
  56 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
  57
  58 // FIXME: Remove this once the bug has been fixed!
  59 extern cl::opt<bool> ANDIGlueBug;
  60
  61 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
  62                                      const PPCSubtarget &STI)
  63     : TargetLowering(TM), Subtarget(STI) {
  64   // Use _setjmp/_longjmp instead of setjmp/longjmp.
  65   setUseUnderscoreSetJmp(true);
  66   setUseUnderscoreLongJmp(true);
  67
  68   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
  69   // arguments are at least 4/8 bytes aligned.
  70   bool isPPC64 = Subtarget.isPPC64();
  71   setMinStackArgumentAlignment(isPPC64 ? 8:4);
  72
  73   // Set up the register classes.
  74   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
  75   addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
  76   addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
  77
  78   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
  79   for (MVT VT : MVT::integer_valuetypes()) {
  80     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
  81     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
  82   }
  83
  84   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
  85
  86   // PowerPC has pre-inc load and store's.
  87   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
  88   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
  89   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
  90   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
  91   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
  92   setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
  93   setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
  94   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
  95   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
  96   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
  97   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
  98   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
  99   setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
 100   setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
 101
 102   if (Subtarget.useCRBits()) {
 103     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 104
 105     if (isPPC64 || Subtarget.hasFPCVT()) {
 106       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
 107       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
 108                          isPPC64 ? MVT::i64 : MVT::i32);
 109       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
 110       AddPromotedToType (ISD::UINT_TO_FP, MVT::i1,
 111                          isPPC64 ? MVT::i64 : MVT::i32);
 112     } else {
 113       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
 114       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
 115     }
 116
 117     // PowerPC does not support direct load / store of condition registers
 118     setOperationAction(ISD::LOAD, MVT::i1, Custom);
 119     setOperationAction(ISD::STORE, MVT::i1, Custom);
 120
 121     // FIXME: Remove this once the ANDI glue bug is fixed:
 122     if (ANDIGlueBug)
 123       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
 124
 125     for (MVT VT : MVT::integer_valuetypes()) {
 126       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 127       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
 128       setTruncStoreAction(VT, MVT::i1, Expand);
 129     }
 130
 131     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
 132   }
 133
 134   // This is used in the ppcf128->int sequence.  Note it has different semantics
 135   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
 136   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
 137
 138   // We do not currently implement these libm ops for PowerPC.
 139   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
 140   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
 141   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
 142   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
 143   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
 144   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
 145
 146   // PowerPC has no SREM/UREM instructions
 147   setOperationAction(ISD::SREM, MVT::i32, Expand);
 148   setOperationAction(ISD::UREM, MVT::i32, Expand);
 149   setOperationAction(ISD::SREM, MVT::i64, Expand);
 150   setOperationAction(ISD::UREM, MVT::i64, Expand);
 151
 152   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
 153   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 154   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 155   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 156   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 157   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 158   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
 159   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
 160   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
 161
 162   // We don't support sin/cos/sqrt/fmod/pow
 163   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 164   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 165   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
 166   setOperationAction(ISD::FREM , MVT::f64, Expand);
 167   setOperationAction(ISD::FPOW , MVT::f64, Expand);
 168   setOperationAction(ISD::FMA  , MVT::f64, Legal);
 169   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 170   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 171   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
 172   setOperationAction(ISD::FREM , MVT::f32, Expand);
 173   setOperationAction(ISD::FPOW , MVT::f32, Expand);
 174   setOperationAction(ISD::FMA  , MVT::f32, Legal);
 175
 176   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 177
 178   // If we're enabling GP optimizations, use hardware square root
 179   if (!Subtarget.hasFSQRT() &&
 180       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
 181         Subtarget.hasFRE()))
 182     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 183
 184   if (!Subtarget.hasFSQRT() &&
 185       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
 186         Subtarget.hasFRES()))
 187     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 188
 189   if (Subtarget.hasFCPSGN()) {
 190     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
 191     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
 192   } else {
 193     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 194     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 195   }
 196
 197   if (Subtarget.hasFPRND()) {
 198     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
 199     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
 200     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
 201     setOperationAction(ISD::FROUND, MVT::f64, Legal);
 202
 203     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
 204     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
 205     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
 206     setOperationAction(ISD::FROUND, MVT::f32, Legal);
 207   }
 208
 209   // PowerPC does not have BSWAP, CTPOP or CTTZ
 210   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
 211   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
 212   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
 213   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
 214   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
 215   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
 216   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
 217   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
 218
 219   if (Subtarget.hasPOPCNTD()) {
 220     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
 221     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
 222   } else {
 223     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
 224     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
 225   }
 226
 227   // PowerPC does not have ROTR
 228   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
 229   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
 230
 231   if (!Subtarget.useCRBits()) {
 232     // PowerPC does not have Select
 233     setOperationAction(ISD::SELECT, MVT::i32, Expand);
 234     setOperationAction(ISD::SELECT, MVT::i64, Expand);
 235     setOperationAction(ISD::SELECT, MVT::f32, Expand);
 236     setOperationAction(ISD::SELECT, MVT::f64, Expand);
 237   }
 238
 239   // PowerPC wants to turn select_cc of FP into fsel when possible.
 240   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 241   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 242
 243   // PowerPC wants to optimize integer setcc a bit
 244   if (!Subtarget.useCRBits())
 245     setOperationAction(ISD::SETCC, MVT::i32, Custom);
 246
 247   // PowerPC does not have BRCOND which requires SetCC
 248   if (!Subtarget.useCRBits())
 249     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 250
 251   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
 252
 253   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
 254   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 255
 256   // PowerPC does not have [U|S]INT_TO_FP
 257   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
 258   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 259
 260   setOperationAction(ISD::BITCAST, MVT::f32, Expand);
 261   setOperationAction(ISD::BITCAST, MVT::i32, Expand);
 262   setOperationAction(ISD::BITCAST, MVT::i64, Expand);
 263   setOperationAction(ISD::BITCAST, MVT::f64, Expand);
 264
 265   // We cannot sextinreg(i1).  Expand to shifts.
 266   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 267
 268   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
 269   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
 270   // support continuation, user-level threading, and etc.. As a result, no
 271   // other SjLj exception interfaces are implemented and please don't build
 272   // your own exception handling based on them.
 273   // LLVM/Clang supports zero-cost DWARF exception handling.
 274   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
 275   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 276
 277   // We want to legalize GlobalAddress and ConstantPool nodes into the
 278   // appropriate instructions to materialize the address.
 279   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 280   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
 281   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
 282   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
 283   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
 284   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 285   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
 286   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
 287   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
 288   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
 289
 290   // TRAP is legal.
 291   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 292
 293   // TRAMPOLINE is custom lowered.
 294   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
 295   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
 296
 297   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 298   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 299
 300   if (Subtarget.isSVR4ABI()) {
 301     if (isPPC64) {
 302       // VAARG always uses double-word chunks, so promote anything smaller.
 303       setOperationAction(ISD::VAARG, MVT::i1, Promote);
 304       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
 305       setOperationAction(ISD::VAARG, MVT::i8, Promote);
 306       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
 307       setOperationAction(ISD::VAARG, MVT::i16, Promote);
 308       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
 309       setOperationAction(ISD::VAARG, MVT::i32, Promote);
 310       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
 311       setOperationAction(ISD::VAARG, MVT::Other, Expand);
 312     } else {
 313       // VAARG is custom lowered with the 32-bit SVR4 ABI.
 314       setOperationAction(ISD::VAARG, MVT::Other, Custom);
 315       setOperationAction(ISD::VAARG, MVT::i64, Custom);
 316     }
 317   } else
 318     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 319
 320   if (Subtarget.isSVR4ABI() && !isPPC64)
 321     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
 322     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
 323   else
 324     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 325
 326   // Use the default implementation.
 327   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 328   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 329   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
 330   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
 331   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
 332
 333   // We want to custom lower some of our intrinsics.
 334   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 335
 336   // To handle counter-based loop conditions.
 337   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
 338
 339   // Comparisons that require checking two conditions.
 340   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
 341   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
 342   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
 343   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
 344   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
 345   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
 346   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
 347   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
 348   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
 349   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
 350   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
 351   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
 352
 353   if (Subtarget.has64BitSupport()) {
 354     // They also have instructions for converting between i64 and fp.
 355     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 356     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 357     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 358     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
 359     // This is just the low 32 bits of a (signed) fp->i64 conversion.
 360     // We cannot do this with Promote because i64 is not a legal type.
 361     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 362
 363     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
 364       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 365   } else {
 366     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
 367     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
 368   }
 369
 370   // With the instructions enabled under FPCVT, we can do everything.
 371   if (Subtarget.hasFPCVT()) {
 372     if (Subtarget.has64BitSupport()) {
 373       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 374       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 375       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 376       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 377     }
 378
 379     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 380     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 381     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 382     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 383   }
 384
 385   if (Subtarget.use64BitRegs()) {
 386     // 64-bit PowerPC implementations can support i64 types directly
 387     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
 388     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 389     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 390     // 64-bit PowerPC wants to expand i128 shifts itself.
 391     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
 392     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
 393     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
 394   } else {
 395     // 32-bit PowerPC wants to expand i64 shifts itself.
 396     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
 397     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
 398     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
 399   }
 400
 401   if (Subtarget.hasAltivec()) {
 402     // First set operation action for all vector types to expand. Then we
 403     // will selectively turn on ones that can be effectively codegen'd.
 404     for (MVT VT : MVT::vector_valuetypes()) {
 405       // add/sub are legal for all supported vector VT's.
 406       // This check is temporary until support for quadword add/sub is added
 407       if (VT.SimpleTy != MVT::v1i128) {
 408         setOperationAction(ISD::ADD , VT, Legal);
 409         setOperationAction(ISD::SUB , VT, Legal);
 410       }
 411       else {
 412         setOperationAction(ISD::ADD , VT, Expand);
 413         setOperationAction(ISD::SUB , VT, Expand);
 414       }
 415
 416       // Vector instructions introduced in P8
 417       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
 418         setOperationAction(ISD::CTPOP, VT, Legal);
 419         setOperationAction(ISD::CTLZ, VT, Legal);
 420       }
 421       else {
 422         setOperationAction(ISD::CTPOP, VT, Expand);
 423         setOperationAction(ISD::CTLZ, VT, Expand);
 424       }
 425
 426       // We promote all shuffles to v16i8.
 427       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
 428       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
 429
 430       // We promote all non-typed operations to v4i32.
 431       setOperationAction(ISD::AND   , VT, Promote);
 432       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
 433       setOperationAction(ISD::OR    , VT, Promote);
 434       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
 435       setOperationAction(ISD::XOR   , VT, Promote);
 436       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
 437       setOperationAction(ISD::LOAD  , VT, Promote);
 438       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
 439       setOperationAction(ISD::SELECT, VT, Promote);
 440       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
 441       setOperationAction(ISD::STORE, VT, Promote);
 442       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
 443
 444       // No other operations are legal.
 445       setOperationAction(ISD::MUL , VT, Expand);
 446       setOperationAction(ISD::SDIV, VT, Expand);
 447       setOperationAction(ISD::SREM, VT, Expand);
 448       setOperationAction(ISD::UDIV, VT, Expand);
 449       setOperationAction(ISD::UREM, VT, Expand);
 450       setOperationAction(ISD::FDIV, VT, Expand);
 451       setOperationAction(ISD::FREM, VT, Expand);
 452       setOperationAction(ISD::FNEG, VT, Expand);
 453       setOperationAction(ISD::FSQRT, VT, Expand);
 454       setOperationAction(ISD::FLOG, VT, Expand);
 455       setOperationAction(ISD::FLOG10, VT, Expand);
 456       setOperationAction(ISD::FLOG2, VT, Expand);
 457       setOperationAction(ISD::FEXP, VT, Expand);
 458       setOperationAction(ISD::FEXP2, VT, Expand);
 459       setOperationAction(ISD::FSIN, VT, Expand);
 460       setOperationAction(ISD::FCOS, VT, Expand);
 461       setOperationAction(ISD::FABS, VT, Expand);
 462       setOperationAction(ISD::FPOWI, VT, Expand);
 463       setOperationAction(ISD::FFLOOR, VT, Expand);
 464       setOperationAction(ISD::FCEIL,  VT, Expand);
 465       setOperationAction(ISD::FTRUNC, VT, Expand);
 466       setOperationAction(ISD::FRINT,  VT, Expand);
 467       setOperationAction(ISD::FNEARBYINT, VT, Expand);
 468       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
 469       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
 470       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
 471       setOperationAction(ISD::MULHU, VT, Expand);
 472       setOperationAction(ISD::MULHS, VT, Expand);
 473       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 474       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 475       setOperationAction(ISD::UDIVREM, VT, Expand);
 476       setOperationAction(ISD::SDIVREM, VT, Expand);
 477       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
 478       setOperationAction(ISD::FPOW, VT, Expand);
 479       setOperationAction(ISD::BSWAP, VT, Expand);
 480       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
 481       setOperationAction(ISD::CTTZ, VT, Expand);
 482       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
 483       setOperationAction(ISD::VSELECT, VT, Expand);
 484       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 485
 486       for (MVT InnerVT : MVT::vector_valuetypes()) {
 487         setTruncStoreAction(VT, InnerVT, Expand);
 488         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
 489         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
 490         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
 491       }
 492     }
 493
 494     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
 495     // with merges, splats, etc.
 496     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
 497
 498     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
 499     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
 500     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
 501     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
 502     setOperationAction(ISD::SELECT, MVT::v4i32,
 503                        Subtarget.useCRBits() ? Legal : Expand);
 504     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
 505     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
 506     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
 507     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
 508     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
 509     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
 510     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
 511     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
 512     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
 513
 514     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
 515     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
 516     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
 517     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
 518
 519     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
 520     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
 521
 522     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
 523       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 524       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
 525     }
 526
 527
 528     if (Subtarget.hasP8Altivec())
 529       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
 530     else
 531       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
 532
 533     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
 534     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 535
 536     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 537     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
 538
 539     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
 540     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
 541     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
 542     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 543
 544     // Altivec does not contain unordered floating-point compare instructions
 545     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
 546     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
 547     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
 548     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
 549
 550     if (Subtarget.hasVSX()) {
 551       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
 552       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
 553
 554       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
 555       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
 556       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
 557       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
 558       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
 559
 560       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
 561
 562       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
 563       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
 564
 565       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
 566       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
 567
 568       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
 569       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
 570       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
 571       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
 572       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
 573
 574       // Share the Altivec comparison restrictions.
 575       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
 576       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
 577       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
 578       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
 579
 580       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
 581       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
 582
 583       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
 584
 585       if (Subtarget.hasP8Vector())
 586         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
 587
 588       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
 589
 590       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
 591       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
 592
 593       if (Subtarget.hasP8Altivec()) {
 594         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
 595         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
 596         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
 597
 598         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
 599       }
 600       else {
 601         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
 602         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
 603         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
 604
 605         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
 606
 607         // VSX v2i64 only supports non-arithmetic operations.
 608         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 609         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
 610       }
 611
 612       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
 613       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
 614       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
 615       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
 616
 617       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
 618
 619       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
 620       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
 621       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
 622       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
 623
 624       // Vector operation legalization checks the result type of
 625       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
 626       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
 627       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
 628       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
 629       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
 630
 631       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
 632     }
 633
 634     if (Subtarget.hasP8Altivec()) {
 635       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
 636       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
 637     }
 638   }
 639
 640   if (Subtarget.hasQPX()) {
 641     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
 642     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
 643     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
 644     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
 645
 646     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
 647     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
 648
 649     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
 650     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
 651
 652     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
 653     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
 654
 655     if (!Subtarget.useCRBits())
 656       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
 657     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
 658
 659     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
 660     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
 661     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
 662     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
 663     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
 664     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
 665     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
 666
 667     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
 668     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
 669
 670     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
 671     setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
 672     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
 673
 674     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
 675     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
 676     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
 677     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
 678     setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
 679     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
 680     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
 681     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
 682     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
 683     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
 684     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
 685
 686     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
 687     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
 688
 689     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
 690     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
 691
 692     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
 693
 694     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
 695     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
 696     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
 697     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
 698
 699     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
 700     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
 701
 702     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
 703     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
 704
 705     if (!Subtarget.useCRBits())
 706       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
 707     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
 708
 709     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
 710     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
 711     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
 712     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
 713     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
 714     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
 715     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 716
 717     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
 718     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
 719
 720     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
 721     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
 722     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
 723     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
 724     setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
 725     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
 726     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
 727     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
 728     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
 729     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
 730     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
 731
 732     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
 733     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
 734
 735     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
 736     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
 737
 738     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
 739
 740     setOperationAction(ISD::AND , MVT::v4i1, Legal);
 741     setOperationAction(ISD::OR , MVT::v4i1, Legal);
 742     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
 743
 744     if (!Subtarget.useCRBits())
 745       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
 746     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
 747
 748     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
 749     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
 750
 751     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
 752     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
 753     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
 754     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
 755     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
 756     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
 757     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
 758
 759     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
 760     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
 761
 762     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
 763
 764     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
 765     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
 766     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
 767     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
 768
 769     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
 770     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
 771     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
 772     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
 773
 774     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
 775     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
 776
 777     // These need to set FE_INEXACT, and so cannot be vectorized here.
 778     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
 779     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
 780
 781     if (TM.Options.UnsafeFPMath) {
 782       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
 783       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
 784
 785       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 786       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
 787     } else {
 788       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
 789       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
 790
 791       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
 792       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
 793     }
 794   }
 795
 796   if (Subtarget.has64BitSupport())
 797     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
 798
 799   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
 800
 801   if (!isPPC64) {
 802     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
 803     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
 804   }
 805
 806   setBooleanContents(ZeroOrOneBooleanContent);
 807
 808   if (Subtarget.hasAltivec()) {
 809     // Altivec instructions set fields to all zeros or all ones.
 810     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 811   }
 812
 813   if (!isPPC64) {
 814     // These libcalls are not available in 32-bit.
 815     setLibcallName(RTLIB::SHL_I128, nullptr);
 816     setLibcallName(RTLIB::SRL_I128, nullptr);
 817     setLibcallName(RTLIB::SRA_I128, nullptr);
 818   }
 819
 820   if (isPPC64) {
 821     setStackPointerRegisterToSaveRestore(PPC::X1);
 822     setExceptionPointerRegister(PPC::X3);
 823     setExceptionSelectorRegister(PPC::X4);
 824   } else {
 825     setStackPointerRegisterToSaveRestore(PPC::R1);
 826     setExceptionPointerRegister(PPC::R3);
 827     setExceptionSelectorRegister(PPC::R4);
 828   }
 829
 830   // We have target-specific dag combine patterns for the following nodes:
 831   setTargetDAGCombine(ISD::SINT_TO_FP);
 832   if (Subtarget.hasFPCVT())
 833     setTargetDAGCombine(ISD::UINT_TO_FP);
 834   setTargetDAGCombine(ISD::LOAD);
 835   setTargetDAGCombine(ISD::STORE);
 836   setTargetDAGCombine(ISD::BR_CC);
 837   if (Subtarget.useCRBits())
 838     setTargetDAGCombine(ISD::BRCOND);
 839   setTargetDAGCombine(ISD::BSWAP);
 840   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
 841   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
 842   setTargetDAGCombine(ISD::INTRINSIC_VOID);
 843
 844   setTargetDAGCombine(ISD::SIGN_EXTEND);
 845   setTargetDAGCombine(ISD::ZERO_EXTEND);
 846   setTargetDAGCombine(ISD::ANY_EXTEND);
 847
 848   if (Subtarget.useCRBits()) {
 849     setTargetDAGCombine(ISD::TRUNCATE);
 850     setTargetDAGCombine(ISD::SETCC);
 851     setTargetDAGCombine(ISD::SELECT_CC);
 852   }
 853
 854   // Use reciprocal estimates.
 855   if (TM.Options.UnsafeFPMath) {
 856     setTargetDAGCombine(ISD::FDIV);
 857     setTargetDAGCombine(ISD::FSQRT);
 858   }
 859
 860   // Darwin long double math library functions have $LDBL128 appended.
 861   if (Subtarget.isDarwin()) {
 862     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
 863     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
 864     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
 865     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
 866     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
 867     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
 868     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
 869     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
 870     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
 871     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
 872   }
 873
 874   // With 32 condition bits, we don't need to sink (and duplicate) compares
 875   // aggressively in CodeGenPrep.
 876   if (Subtarget.useCRBits()) {
 877     setHasMultipleConditionRegisters();
 878     setJumpIsExpensive();
 879   }
 880
 881   setMinFunctionAlignment(2);
 882   if (Subtarget.isDarwin())
 883     setPrefFunctionAlignment(4);
 884
 885   switch (Subtarget.getDarwinDirective()) {
 886   default: break;
 887   case PPC::DIR_970:
 888   case PPC::DIR_A2:
 889   case PPC::DIR_E500mc:
 890   case PPC::DIR_E5500:
 891   case PPC::DIR_PWR4:
 892   case PPC::DIR_PWR5:
 893   case PPC::DIR_PWR5X:
 894   case PPC::DIR_PWR6:
 895   case PPC::DIR_PWR6X:
 896   case PPC::DIR_PWR7:
 897   case PPC::DIR_PWR8:
 898     setPrefFunctionAlignment(4);
 899     setPrefLoopAlignment(4);
 900     break;
 901   }
 902
 903   setInsertFencesForAtomic(true);
 904
 905   if (Subtarget.enableMachineScheduler())
 906     setSchedulingPreference(Sched::Source);
 907   else
 908     setSchedulingPreference(Sched::Hybrid);
 909
 910   computeRegisterProperties(STI.getRegisterInfo());
 911
 912   // The Freescale cores do better with aggressive inlining of memcpy and
 913   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
 914   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
 915       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
 916     MaxStoresPerMemset = 32;
 917     MaxStoresPerMemsetOptSize = 16;
 918     MaxStoresPerMemcpy = 32;
 919     MaxStoresPerMemcpyOptSize = 8;
 920     MaxStoresPerMemmove = 32;
 921     MaxStoresPerMemmoveOptSize = 8;
 922   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
 923     // The A2 also benefits from (very) aggressive inlining of memcpy and
 924     // friends. The overhead of a the function call, even when warm, can be
 925     // over one hundred cycles.
 926     MaxStoresPerMemset = 128;
 927     MaxStoresPerMemcpy = 128;
 928     MaxStoresPerMemmove = 128;
 929   }
 930 }
 931
 932 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
 933 /// the desired ByVal argument alignment.
 934 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
 935                              unsigned MaxMaxAlign) {
 936   if (MaxAlign == MaxMaxAlign)
 937     return;
 938   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
 939     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
 940       MaxAlign = 32;
 941     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
 942       MaxAlign = 16;
 943   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
 944     unsigned EltAlign = 0;
 945     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
 946     if (EltAlign > MaxAlign)
 947       MaxAlign = EltAlign;
 948   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
 949     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
 950       unsigned EltAlign = 0;
 951       getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
 952       if (EltAlign > MaxAlign)
 953         MaxAlign = EltAlign;
 954       if (MaxAlign == MaxMaxAlign)
 955         break;
 956     }
 957   }
 958 }
 959
 960 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
 961 /// function arguments in the caller parameter area.
 962 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
 963   // Darwin passes everything on 4 byte boundary.
 964   if (Subtarget.isDarwin())
 965     return 4;
 966
 967   // 16byte and wider vectors are passed on 16byte boundary.
 968   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
 969   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
 970   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
 971     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
 972   return Align;
 973 }
 974
 975 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
 976   switch ((PPCISD::NodeType)Opcode) {
 977   case PPCISD::FIRST_NUMBER:    break;
 978   case PPCISD::FSEL:            return "PPCISD::FSEL";
 979   case PPCISD::FCFID:           return "PPCISD::FCFID";
 980   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
 981   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
 982   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
 983   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
 984   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
 985   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
 986   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
 987   case PPCISD::FRE:             return "PPCISD::FRE";
 988   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
 989   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
 990   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
 991   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
 992   case PPCISD::VPERM:           return "PPCISD::VPERM";
 993   case PPCISD::CMPB:            return "PPCISD::CMPB";
 994   case PPCISD::Hi:              return "PPCISD::Hi";
 995   case PPCISD::Lo:              return "PPCISD::Lo";
 996   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
 997   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
 998   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
 999   case PPCISD::SRL:             return "PPCISD::SRL";
1000   case PPCISD::SRA:             return "PPCISD::SRA";
1001   case PPCISD::SHL:             return "PPCISD::SHL";
1002   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1003   case PPCISD::CALL:            return "PPCISD::CALL";
1004   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1005   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1006   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1007   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1008   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1009   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1010   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1011   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1012   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1013   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1014   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1015   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1016   case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1017   case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1018   case PPCISD::VCMP:            return "PPCISD::VCMP";
1019   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1020   case PPCISD::LBRX:            return "PPCISD::LBRX";
1021   case PPCISD::STBRX:           return "PPCISD::STBRX";
1022   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1023   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1024   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1025   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1026   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1027   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1028   case PPCISD::BDZ:             return "PPCISD::BDZ";
1029   case PPCISD::MFFS:            return "PPCISD::MFFS";
1030   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1031   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1032   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1033   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1034   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1035   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1036   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1037   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1038   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1039   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1040   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1041   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1042   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1043   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1044   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1045   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1046   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1047   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1048   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1049   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1050   case PPCISD::SC:              return "PPCISD::SC";
1051   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1052   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1053   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1054   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1055   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1056   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1057   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1058   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1059   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1060   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1061   }
1062   return nullptr;
1063 }
1064
1065 EVT PPCTargetLowering::getSetCCResultType(LLVMContext &C, EVT VT) const {
1066   if (!VT.isVector())
1067     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1068
1069   if (Subtarget.hasQPX())
1070     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1071
1072   return VT.changeVectorElementTypeToInteger();
1073 }
1074
1075 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1076   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1077   return true;
1078 }
1079
1080 //===----------------------------------------------------------------------===//
1081 // Node matching predicates, for use by the tblgen matching code.
1082 //===----------------------------------------------------------------------===//
1083
1084 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1085 static bool isFloatingPointZero(SDValue Op) {
1086   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1087     return CFP->getValueAPF().isZero();
1088   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1089     // Maybe this has already been legalized into the constant pool?
1090     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1091       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1092         return CFP->getValueAPF().isZero();
1093   }
1094   return false;
1095 }
1096
1097 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1098 /// true if Op is undef or if it matches the specified value.
1099 static bool isConstantOrUndef(int Op, int Val) {
1100   return Op < 0 || Op == Val;
1101 }
1102
1103 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1104 /// VPKUHUM instruction.
1105 /// The ShuffleKind distinguishes between big-endian operations with
1106 /// two different inputs (0), either-endian operations with two identical
1107 /// inputs (1), and little-endian operations with two different inputs (2).
1108 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1109 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1110                                SelectionDAG &DAG) {
1111   bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
1112   if (ShuffleKind == 0) {
1113     if (IsLE)
1114       return false;
1115     for (unsigned i = 0; i != 16; ++i)
1116       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1117         return false;
1118   } else if (ShuffleKind == 2) {
1119     if (!IsLE)
1120       return false;
1121     for (unsigned i = 0; i != 16; ++i)
1122       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1123         return false;
1124   } else if (ShuffleKind == 1) {
1125     unsigned j = IsLE ? 0 : 1;
1126     for (unsigned i = 0; i != 8; ++i)
1127       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1128           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1129         return false;
1130   }
1131   return true;
1132 }
1133
1134 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1135 /// VPKUWUM instruction.
1136 /// The ShuffleKind distinguishes between big-endian operations with
1137 /// two different inputs (0), either-endian operations with two identical
1138 /// inputs (1), and little-endian operations with two different inputs (2).
1139 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1140 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1141                                SelectionDAG &DAG) {
1142   bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
1143   if (ShuffleKind == 0) {
1144     if (IsLE)
1145       return false;
1146     for (unsigned i = 0; i != 16; i += 2)
1147       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1148           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1149         return false;
1150   } else if (ShuffleKind == 2) {
1151     if (!IsLE)
1152       return false;
1153     for (unsigned i = 0; i != 16; i += 2)
1154       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1155           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1156         return false;
1157   } else if (ShuffleKind == 1) {
1158     unsigned j = IsLE ? 0 : 2;
1159     for (unsigned i = 0; i != 8; i += 2)
1160       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1161           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1162           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1163           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1164         return false;
1165   }
1166   return true;
1167 }
1168
1169 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1170 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1171 /// current subtarget.
1172 ///
1173 /// The ShuffleKind distinguishes between big-endian operations with
1174 /// two different inputs (0), either-endian operations with two identical
1175 /// inputs (1), and little-endian operations with two different inputs (2).
1176 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1177 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1178                                SelectionDAG &DAG) {
1179   const PPCSubtarget& Subtarget =
1180     static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1181   if (!Subtarget.hasP8Vector())
1182     return false;
1183
1184   bool IsLE = DAG.getTarget().getDataLayout()->isLittleEndian();
1185   if (ShuffleKind == 0) {
1186     if (IsLE)
1187       return false;
1188     for (unsigned i = 0; i != 16; i += 4)
1189       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1190           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1191           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1192           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1193         return false;
1194   } else if (ShuffleKind == 2) {
1195     if (!IsLE)
1196       return false;
1197     for (unsigned i = 0; i != 16; i += 4)
1198       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1199           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1200           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1201           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1202         return false;
1203   } else if (ShuffleKind == 1) {
1204     unsigned j = IsLE ? 0 : 4;
1205     for (unsigned i = 0; i != 8; i += 4)
1206       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1207           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1208           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1209           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1210           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1211           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1212           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1213           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1214         return false;
1215   }
1216   return true;
1217 }
1218
1219 /// isVMerge - Common function, used to match vmrg* shuffles.
1220 ///
1221 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1222                      unsigned LHSStart, unsigned RHSStart) {
1223   if (N->getValueType(0) != MVT::v16i8)
1224     return false;
1225   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1226          "Unsupported merge size!");
1227
1228   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1229     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1230       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1231                              LHSStart+j+i*UnitSize) ||
1232           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1233                              RHSStart+j+i*UnitSize))
1234         return false;
1235     }
1236   return true;
1237 }
1238
1239 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1240 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1241 /// The ShuffleKind distinguishes between big-endian merges with two
1242 /// different inputs (0), either-endian merges with two identical inputs (1),
1243 /// and little-endian merges with two different inputs (2).  For the latter,
1244 /// the input operands are swapped (see PPCInstrAltivec.td).
1245 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1246                              unsigned ShuffleKind, SelectionDAG &DAG) {
1247   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
1248     if (ShuffleKind == 1) // unary
1249       return isVMerge(N, UnitSize, 0, 0);
1250     else if (ShuffleKind == 2) // swapped
1251       return isVMerge(N, UnitSize, 0, 16);
1252     else
1253       return false;
1254   } else {
1255     if (ShuffleKind == 1) // unary
1256       return isVMerge(N, UnitSize, 8, 8);
1257     else if (ShuffleKind == 0) // normal
1258       return isVMerge(N, UnitSize, 8, 24);
1259     else
1260       return false;
1261   }
1262 }
1263
1264 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1265 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1266 /// The ShuffleKind distinguishes between big-endian merges with two
1267 /// different inputs (0), either-endian merges with two identical inputs (1),
1268 /// and little-endian merges with two different inputs (2).  For the latter,
1269 /// the input operands are swapped (see PPCInstrAltivec.td).
1270 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1271                              unsigned ShuffleKind, SelectionDAG &DAG) {
1272   if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
1273     if (ShuffleKind == 1) // unary
1274       return isVMerge(N, UnitSize, 8, 8);
1275     else if (ShuffleKind == 2) // swapped
1276       return isVMerge(N, UnitSize, 8, 24);
1277     else
1278       return false;
1279   } else {
1280     if (ShuffleKind == 1) // unary
1281       return isVMerge(N, UnitSize, 0, 0);
1282     else if (ShuffleKind == 0) // normal
1283       return isVMerge(N, UnitSize, 0, 16);
1284     else
1285       return false;
1286   }
1287 }
1288
1289
1290 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1291 /// amount, otherwise return -1.
1292 /// The ShuffleKind distinguishes between big-endian operations with two
1293 /// different inputs (0), either-endian operations with two identical inputs
1294 /// (1), and little-endian operations with two different inputs (2).  For the
1295 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1296 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1297                              SelectionDAG &DAG) {
1298   if (N->getValueType(0) != MVT::v16i8)
1299     return -1;
1300
1301   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1302
1303   // Find the first non-undef value in the shuffle mask.
1304   unsigned i;
1305   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1306     /*search*/;
1307
1308   if (i == 16) return -1;  // all undef.
1309
1310   // Otherwise, check to see if the rest of the elements are consecutively
1311   // numbered from this value.
1312   unsigned ShiftAmt = SVOp->getMaskElt(i);
1313   if (ShiftAmt < i) return -1;
1314
1315   ShiftAmt -= i;
1316   bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
1317
1318   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1319     // Check the rest of the elements to see if they are consecutive.
1320     for (++i; i != 16; ++i)
1321       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1322         return -1;
1323   } else if (ShuffleKind == 1) {
1324     // Check the rest of the elements to see if they are consecutive.
1325     for (++i; i != 16; ++i)
1326       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1327         return -1;
1328   } else
1329     return -1;
1330
1331   if (ShuffleKind == 2 && isLE)
1332     ShiftAmt = 16 - ShiftAmt;
1333
1334   return ShiftAmt;
1335 }
1336
1337 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1338 /// specifies a splat of a single element that is suitable for input to
1339 /// VSPLTB/VSPLTH/VSPLTW.
1340 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1341   assert(N->getValueType(0) == MVT::v16i8 &&
1342          (EltSize == 1 || EltSize == 2 || EltSize == 4));
1343
1344   // This is a splat operation if each element of the permute is the same, and
1345   // if the value doesn't reference the second vector.
1346   unsigned ElementBase = N->getMaskElt(0);
1347
1348   // FIXME: Handle UNDEF elements too!
1349   if (ElementBase >= 16)
1350     return false;
1351
1352   // Check that the indices are consecutive, in the case of a multi-byte element
1353   // splatted with a v16i8 mask.
1354   for (unsigned i = 1; i != EltSize; ++i)
1355     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1356       return false;
1357
1358   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1359     if (N->getMaskElt(i) < 0) continue;
1360     for (unsigned j = 0; j != EltSize; ++j)
1361       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1362         return false;
1363   }
1364   return true;
1365 }
1366
1367 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1368 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1369 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1370                                 SelectionDAG &DAG) {
1371   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1372   assert(isSplatShuffleMask(SVOp, EltSize));
1373   if (DAG.getTarget().getDataLayout()->isLittleEndian())
1374     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1375   else
1376     return SVOp->getMaskElt(0) / EltSize;
1377 }
1378
1379 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1380 /// by using a vspltis[bhw] instruction of the specified element size, return
1381 /// the constant being splatted.  The ByteSize field indicates the number of
1382 /// bytes of each element [124] -> [bhw].
1383 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1384   SDValue OpVal(nullptr, 0);
1385
1386   // If ByteSize of the splat is bigger than the element size of the
1387   // build_vector, then we have a case where we are checking for a splat where
1388   // multiple elements of the buildvector are folded together into a single
1389   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1390   unsigned EltSize = 16/N->getNumOperands();
1391   if (EltSize < ByteSize) {
1392     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
1393     SDValue UniquedVals[4];
1394     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1395
1396     // See if all of the elements in the buildvector agree across.
1397     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1398       if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1399       // If the element isn't a constant, bail fully out.
1400       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1401
1402
1403       if (!UniquedVals[i&(Multiple-1)].getNode())
1404         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1405       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1406         return SDValue();  // no match.
1407     }
1408
1409     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1410     // either constant or undef values that are identical for each chunk.  See
1411     // if these chunks can form into a larger vspltis*.
1412
1413     // Check to see if all of the leading entries are either 0 or -1.  If
1414     // neither, then this won't fit into the immediate field.
1415     bool LeadingZero = true;
1416     bool LeadingOnes = true;
1417     for (unsigned i = 0; i != Multiple-1; ++i) {
1418       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
1419
1420       LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
1421       LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
1422     }
1423     // Finally, check the least significant entry.
1424     if (LeadingZero) {
1425       if (!UniquedVals[Multiple-1].getNode())
1426         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
1427       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1428       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
1429         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1430     }
1431     if (LeadingOnes) {
1432       if (!UniquedVals[Multiple-1].getNode())
1433         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
1434       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1435       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
1436         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1437     }
1438
1439     return SDValue();
1440   }
1441
1442   // Check to see if this buildvec has a single non-undef value in its elements.
1443   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1444     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1445     if (!OpVal.getNode())
1446       OpVal = N->getOperand(i);
1447     else if (OpVal != N->getOperand(i))
1448       return SDValue();
1449   }
1450
1451   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
1452
1453   unsigned ValSizeInBytes = EltSize;
1454   uint64_t Value = 0;
1455   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1456     Value = CN->getZExtValue();
1457   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1458     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1459     Value = FloatToBits(CN->getValueAPF().convertToFloat());
1460   }
1461
1462   // If the splat value is larger than the element value, then we can never do
1463   // this splat.  The only case that we could fit the replicated bits into our
1464   // immediate field for would be zero, and we prefer to use vxor for it.
1465   if (ValSizeInBytes < ByteSize) return SDValue();
1466
1467   // If the element value is larger than the splat value, check if it consists
1468   // of a repeated bit pattern of size ByteSize.
1469   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
1470     return SDValue();
1471
1472   // Properly sign extend the value.
1473   int MaskVal = SignExtend32(Value, ByteSize * 8);
1474
1475   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1476   if (MaskVal == 0) return SDValue();
1477
1478   // Finally, if this value fits in a 5 bit sext field, return it
1479   if (SignExtend32<5>(MaskVal) == MaskVal)
1480     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
1481   return SDValue();
1482 }
1483
1484 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
1485 /// amount, otherwise return -1.
1486 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
1487   EVT VT = N->getValueType(0);
1488   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
1489     return -1;
1490
1491   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1492
1493   // Find the first non-undef value in the shuffle mask.
1494   unsigned i;
1495   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
1496     /*search*/;
1497
1498   if (i == 4) return -1;  // all undef.
1499
1500   // Otherwise, check to see if the rest of the elements are consecutively
1501   // numbered from this value.
1502   unsigned ShiftAmt = SVOp->getMaskElt(i);
1503   if (ShiftAmt < i) return -1;
1504   ShiftAmt -= i;
1505
1506   // Check the rest of the elements to see if they are consecutive.
1507   for (++i; i != 4; ++i)
1508     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1509       return -1;
1510
1511   return ShiftAmt;
1512 }
1513
1514 //===----------------------------------------------------------------------===//
1515 //  Addressing Mode Selection
1516 //===----------------------------------------------------------------------===//
1517
1518 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
1519 /// or 64-bit immediate, and if the value can be accurately represented as a
1520 /// sign extension from a 16-bit value.  If so, this returns true and the
1521 /// immediate.
1522 static bool isIntS16Immediate(SDNode *N, short &Imm) {
1523   if (!isa<ConstantSDNode>(N))
1524     return false;
1525
1526   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
1527   if (N->getValueType(0) == MVT::i32)
1528     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
1529   else
1530     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
1531 }
1532 static bool isIntS16Immediate(SDValue Op, short &Imm) {
1533   return isIntS16Immediate(Op.getNode(), Imm);
1534 }
1535
1536
1537 /// SelectAddressRegReg - Given the specified addressed, check to see if it
1538 /// can be represented as an indexed [r+r] operation.  Returns false if it
1539 /// can be more efficiently represented with [r+imm].
1540 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
1541                                             SDValue &Index,
1542                                             SelectionDAG &DAG) const {
1543   short imm = 0;
1544   if (N.getOpcode() == ISD::ADD) {
1545     if (isIntS16Immediate(N.getOperand(1), imm))
1546       return false;    // r+i
1547     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
1548       return false;    // r+i
1549
1550     Base = N.getOperand(0);
1551     Index = N.getOperand(1);
1552     return true;
1553   } else if (N.getOpcode() == ISD::OR) {
1554     if (isIntS16Immediate(N.getOperand(1), imm))
1555       return false;    // r+i can fold it if we can.
1556
1557     // If this is an or of disjoint bitfields, we can codegen this as an add
1558     // (for better address arithmetic) if the LHS and RHS of the OR are provably
1559     // disjoint.
1560     APInt LHSKnownZero, LHSKnownOne;
1561     APInt RHSKnownZero, RHSKnownOne;
1562     DAG.computeKnownBits(N.getOperand(0),
1563                          LHSKnownZero, LHSKnownOne);
1564
1565     if (LHSKnownZero.getBoolValue()) {
1566       DAG.computeKnownBits(N.getOperand(1),
1567                            RHSKnownZero, RHSKnownOne);
1568       // If all of the bits are known zero on the LHS or RHS, the add won't
1569       // carry.
1570       if (~(LHSKnownZero | RHSKnownZero) == 0) {
1571         Base = N.getOperand(0);
1572         Index = N.getOperand(1);
1573         return true;
1574       }
1575     }
1576   }
1577
1578   return false;
1579 }
1580
1581 // If we happen to be doing an i64 load or store into a stack slot that has
1582 // less than a 4-byte alignment, then the frame-index elimination may need to
1583 // use an indexed load or store instruction (because the offset may not be a
1584 // multiple of 4). The extra register needed to hold the offset comes from the
1585 // register scavenger, and it is possible that the scavenger will need to use
1586 // an emergency spill slot. As a result, we need to make sure that a spill slot
1587 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
1588 // stack slot.
1589 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
1590   // FIXME: This does not handle the LWA case.
1591   if (VT != MVT::i64)
1592     return;
1593
1594   // NOTE: We'll exclude negative FIs here, which come from argument
1595   // lowering, because there are no known test cases triggering this problem
1596   // using packed structures (or similar). We can remove this exclusion if
1597   // we find such a test case. The reason why this is so test-case driven is
1598   // because this entire 'fixup' is only to prevent crashes (from the
1599   // register scavenger) on not-really-valid inputs. For example, if we have:
1600   //   %a = alloca i1
1601   //   %b = bitcast i1* %a to i64*
1602   //   store i64* a, i64 b
1603   // then the store should really be marked as 'align 1', but is not. If it
1604   // were marked as 'align 1' then the indexed form would have been
1605   // instruction-selected initially, and the problem this 'fixup' is preventing
1606   // won't happen regardless.
1607   if (FrameIdx < 0)
1608     return;
1609
1610   MachineFunction &MF = DAG.getMachineFunction();
1611   MachineFrameInfo *MFI = MF.getFrameInfo();
1612
1613   unsigned Align = MFI->getObjectAlignment(FrameIdx);
1614   if (Align >= 4)
1615     return;
1616
1617   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1618   FuncInfo->setHasNonRISpills();
1619 }
1620
1621 /// Returns true if the address N can be represented by a base register plus
1622 /// a signed 16-bit displacement [r+imm], and if it is not better
1623 /// represented as reg+reg.  If Aligned is true, only accept displacements
1624 /// suitable for STD and friends, i.e. multiples of 4.
1625 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
1626                                             SDValue &Base,
1627                                             SelectionDAG &DAG,
1628                                             bool Aligned) const {
1629   // FIXME dl should come from parent load or store, not from address
1630   SDLoc dl(N);
1631   // If this can be more profitably realized as r+r, fail.
1632   if (SelectAddressRegReg(N, Disp, Base, DAG))
1633     return false;
1634
1635   if (N.getOpcode() == ISD::ADD) {
1636     short imm = 0;
1637     if (isIntS16Immediate(N.getOperand(1), imm) &&
1638         (!Aligned || (imm & 3) == 0)) {
1639       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1640       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1641         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1642         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1643       } else {
1644         Base = N.getOperand(0);
1645       }
1646       return true; // [r+i]
1647     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1648       // Match LOAD (ADD (X, Lo(G))).
1649       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1650              && "Cannot handle constant offsets yet!");
1651       Disp = N.getOperand(1).getOperand(0);  // The global address.
1652       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1653              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
1654              Disp.getOpcode() == ISD::TargetConstantPool ||
1655              Disp.getOpcode() == ISD::TargetJumpTable);
1656       Base = N.getOperand(0);
1657       return true;  // [&g+r]
1658     }
1659   } else if (N.getOpcode() == ISD::OR) {
1660     short imm = 0;
1661     if (isIntS16Immediate(N.getOperand(1), imm) &&
1662         (!Aligned || (imm & 3) == 0)) {
1663       // If this is an or of disjoint bitfields, we can codegen this as an add
1664       // (for better address arithmetic) if the LHS and RHS of the OR are
1665       // provably disjoint.
1666       APInt LHSKnownZero, LHSKnownOne;
1667       DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1668
1669       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1670         // If all of the bits are known zero on the LHS or RHS, the add won't
1671         // carry.
1672         if (FrameIndexSDNode *FI =
1673               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1674           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1675           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1676         } else {
1677           Base = N.getOperand(0);
1678         }
1679         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1680         return true;
1681       }
1682     }
1683   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1684     // Loading from a constant address.
1685
1686     // If this address fits entirely in a 16-bit sext immediate field, codegen
1687     // this as "d, 0"
1688     short Imm;
1689     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
1690       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
1691       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1692                              CN->getValueType(0));
1693       return true;
1694     }
1695
1696     // Handle 32-bit sext immediates with LIS + addr mode.
1697     if ((CN->getValueType(0) == MVT::i32 ||
1698          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
1699         (!Aligned || (CN->getZExtValue() & 3) == 0)) {
1700       int Addr = (int)CN->getZExtValue();
1701
1702       // Otherwise, break this down into an LIS + disp.
1703       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
1704
1705       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
1706                                    MVT::i32);
1707       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1708       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
1709       return true;
1710     }
1711   }
1712
1713   Disp = DAG.getTargetConstant(0, dl, getPointerTy());
1714   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
1715     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1716     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1717   } else
1718     Base = N;
1719   return true;      // [r+0]
1720 }
1721
1722 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
1723 /// represented as an indexed [r+r] operation.
1724 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
1725                                                 SDValue &Index,
1726                                                 SelectionDAG &DAG) const {
1727   // Check to see if we can easily represent this as an [r+r] address.  This
1728   // will fail if it thinks that the address is more profitably represented as
1729   // reg+imm, e.g. where imm = 0.
1730   if (SelectAddressRegReg(N, Base, Index, DAG))
1731     return true;
1732
1733   // If the operand is an addition, always emit this as [r+r], since this is
1734   // better (for code size, and execution, as the memop does the add for free)
1735   // than emitting an explicit add.
1736   if (N.getOpcode() == ISD::ADD) {
1737     Base = N.getOperand(0);
1738     Index = N.getOperand(1);
1739     return true;
1740   }
1741
1742   // Otherwise, do it the hard way, using R0 as the base register.
1743   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1744                          N.getValueType());
1745   Index = N;
1746   return true;
1747 }
1748
1749 /// getPreIndexedAddressParts - returns true by value, base pointer and
1750 /// offset pointer and addressing mode by reference if the node's address
1751 /// can be legally represented as pre-indexed load / store address.
1752 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1753                                                   SDValue &Offset,
1754                                                   ISD::MemIndexedMode &AM,
1755                                                   SelectionDAG &DAG) const {
1756   if (DisablePPCPreinc) return false;
1757
1758   bool isLoad = true;
1759   SDValue Ptr;
1760   EVT VT;
1761   unsigned Alignment;
1762   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1763     Ptr = LD->getBasePtr();
1764     VT = LD->getMemoryVT();
1765     Alignment = LD->getAlignment();
1766   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1767     Ptr = ST->getBasePtr();
1768     VT  = ST->getMemoryVT();
1769     Alignment = ST->getAlignment();
1770     isLoad = false;
1771   } else
1772     return false;
1773
1774   // PowerPC doesn't have preinc load/store instructions for vectors (except
1775   // for QPX, which does have preinc r+r forms).
1776   if (VT.isVector()) {
1777     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
1778       return false;
1779     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
1780       AM = ISD::PRE_INC;
1781       return true;
1782     }
1783   }
1784
1785   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
1786
1787     // Common code will reject creating a pre-inc form if the base pointer
1788     // is a frame index, or if N is a store and the base pointer is either
1789     // the same as or a predecessor of the value being stored.  Check for
1790     // those situations here, and try with swapped Base/Offset instead.
1791     bool Swap = false;
1792
1793     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
1794       Swap = true;
1795     else if (!isLoad) {
1796       SDValue Val = cast<StoreSDNode>(N)->getValue();
1797       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
1798         Swap = true;
1799     }
1800
1801     if (Swap)
1802       std::swap(Base, Offset);
1803
1804     AM = ISD::PRE_INC;
1805     return true;
1806   }
1807
1808   // LDU/STU can only handle immediates that are a multiple of 4.
1809   if (VT != MVT::i64) {
1810     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
1811       return false;
1812   } else {
1813     // LDU/STU need an address with at least 4-byte alignment.
1814     if (Alignment < 4)
1815       return false;
1816
1817     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
1818       return false;
1819   }
1820
1821   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1822     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
1823     // sext i32 to i64 when addr mode is r+i.
1824     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
1825         LD->getExtensionType() == ISD::SEXTLOAD &&
1826         isa<ConstantSDNode>(Offset))
1827       return false;
1828   }
1829
1830   AM = ISD::PRE_INC;
1831   return true;
1832 }
1833
1834 //===----------------------------------------------------------------------===//
1835 //  LowerOperation implementation
1836 //===----------------------------------------------------------------------===//
1837
1838 /// GetLabelAccessInfo - Return true if we should reference labels using a
1839 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
1840 static bool GetLabelAccessInfo(const TargetMachine &TM,
1841                                const PPCSubtarget &Subtarget,
1842                                unsigned &HiOpFlags, unsigned &LoOpFlags,
1843                                const GlobalValue *GV = nullptr) {
1844   HiOpFlags = PPCII::MO_HA;
1845   LoOpFlags = PPCII::MO_LO;
1846
1847   // Don't use the pic base if not in PIC relocation model.
1848   bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
1849
1850   if (isPIC) {
1851     HiOpFlags |= PPCII::MO_PIC_FLAG;
1852     LoOpFlags |= PPCII::MO_PIC_FLAG;
1853   }
1854
1855   // If this is a reference to a global value that requires a non-lazy-ptr, make
1856   // sure that instruction lowering adds it.
1857   if (GV && Subtarget.hasLazyResolverStub(GV)) {
1858     HiOpFlags |= PPCII::MO_NLP_FLAG;
1859     LoOpFlags |= PPCII::MO_NLP_FLAG;
1860
1861     if (GV->hasHiddenVisibility()) {
1862       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1863       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1864     }
1865   }
1866
1867   return isPIC;
1868 }
1869
1870 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
1871                              SelectionDAG &DAG) {
1872   SDLoc DL(HiPart);
1873   EVT PtrVT = HiPart.getValueType();
1874   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
1875
1876   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
1877   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
1878
1879   // With PIC, the first instruction is actually "GR+hi(&G)".
1880   if (isPIC)
1881     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
1882                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
1883
1884   // Generate non-pic code that has direct accesses to the constant pool.
1885   // The address of the global is just (hi(&g)+lo(&g)).
1886   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
1887 }
1888
1889 static void setUsesTOCBasePtr(MachineFunction &MF) {
1890   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1891   FuncInfo->setUsesTOCBasePtr();
1892 }
1893
1894 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
1895   setUsesTOCBasePtr(DAG.getMachineFunction());
1896 }
1897
1898 static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
1899                            SDValue GA) {
1900   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
1901   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
1902                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
1903
1904   SDValue Ops[] = { GA, Reg };
1905   return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
1906                                  DAG.getVTList(VT, MVT::Other), Ops, VT,
1907                                  MachinePointerInfo::getGOT(), 0, false, true,
1908                                  false, 0);
1909 }
1910
1911 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
1912                                              SelectionDAG &DAG) const {
1913   EVT PtrVT = Op.getValueType();
1914   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1915   const Constant *C = CP->getConstVal();
1916
1917   // 64-bit SVR4 ABI code is always position-independent.
1918   // The actual address of the GlobalValue is stored in the TOC.
1919   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
1920     setUsesTOCBasePtr(DAG);
1921     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
1922     return getTOCEntry(DAG, SDLoc(CP), true, GA);
1923   }
1924
1925   unsigned MOHiFlag, MOLoFlag;
1926   bool isPIC =
1927       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
1928
1929   if (isPIC && Subtarget.isSVR4ABI()) {
1930     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
1931                                            PPCII::MO_PIC_FLAG);
1932     return getTOCEntry(DAG, SDLoc(CP), false, GA);
1933   }
1934
1935   SDValue CPIHi =
1936     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
1937   SDValue CPILo =
1938     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
1939   return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
1940 }
1941
1942 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1943   EVT PtrVT = Op.getValueType();
1944   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1945
1946   // 64-bit SVR4 ABI code is always position-independent.
1947   // The actual address of the GlobalValue is stored in the TOC.
1948   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
1949     setUsesTOCBasePtr(DAG);
1950     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1951     return getTOCEntry(DAG, SDLoc(JT), true, GA);
1952   }
1953
1954   unsigned MOHiFlag, MOLoFlag;
1955   bool isPIC =
1956       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
1957
1958   if (isPIC && Subtarget.isSVR4ABI()) {
1959     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
1960                                         PPCII::MO_PIC_FLAG);
1961     return getTOCEntry(DAG, SDLoc(GA), false, GA);
1962   }
1963
1964   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
1965   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
1966   return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
1967 }
1968
1969 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
1970                                              SelectionDAG &DAG) const {
1971   EVT PtrVT = Op.getValueType();
1972   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
1973   const BlockAddress *BA = BASDN->getBlockAddress();
1974
1975   // 64-bit SVR4 ABI code is always position-independent.
1976   // The actual BlockAddress is stored in the TOC.
1977   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
1978     setUsesTOCBasePtr(DAG);
1979     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
1980     return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
1981   }
1982
1983   unsigned MOHiFlag, MOLoFlag;
1984   bool isPIC =
1985       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
1986   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
1987   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
1988   return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
1989 }
1990
1991 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1992                                               SelectionDAG &DAG) const {
1993
1994   // FIXME: TLS addresses currently use medium model code sequences,
1995   // which is the most useful form.  Eventually support for small and
1996   // large models could be added if users need it, at the cost of
1997   // additional complexity.
1998   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1999   SDLoc dl(GA);
2000   const GlobalValue *GV = GA->getGlobal();
2001   EVT PtrVT = getPointerTy();
2002   bool is64bit = Subtarget.isPPC64();
2003   const Module *M = DAG.getMachineFunction().getFunction()->getParent();
2004   PICLevel::Level picLevel = M->getPICLevel();
2005
2006   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
2007
2008   if (Model == TLSModel::LocalExec) {
2009     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2010                                                PPCII::MO_TPREL_HA);
2011     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2012                                                PPCII::MO_TPREL_LO);
2013     SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
2014                                      is64bit ? MVT::i64 : MVT::i32);
2015     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2016     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2017   }
2018
2019   if (Model == TLSModel::InitialExec) {
2020     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2021     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2022                                                 PPCII::MO_TLS);
2023     SDValue GOTPtr;
2024     if (is64bit) {
2025       setUsesTOCBasePtr(DAG);
2026       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2027       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2028                            PtrVT, GOTReg, TGA);
2029     } else
2030       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2031     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2032                                    PtrVT, TGA, GOTPtr);
2033     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2034   }
2035
2036   if (Model == TLSModel::GeneralDynamic) {
2037     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2038     SDValue GOTPtr;
2039     if (is64bit) {
2040       setUsesTOCBasePtr(DAG);
2041       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2042       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2043                                    GOTReg, TGA);
2044     } else {
2045       if (picLevel == PICLevel::Small)
2046         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2047       else
2048         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2049     }
2050     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2051                        GOTPtr, TGA, TGA);
2052   }
2053
2054   if (Model == TLSModel::LocalDynamic) {
2055     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2056     SDValue GOTPtr;
2057     if (is64bit) {
2058       setUsesTOCBasePtr(DAG);
2059       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2060       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2061                            GOTReg, TGA);
2062     } else {
2063       if (picLevel == PICLevel::Small)
2064         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2065       else
2066         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2067     }
2068     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2069                                   PtrVT, GOTPtr, TGA, TGA);
2070     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2071                                       PtrVT, TLSAddr, TGA);
2072     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2073   }
2074
2075   llvm_unreachable("Unknown TLS model!");
2076 }
2077
2078 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2079                                               SelectionDAG &DAG) const {
2080   EVT PtrVT = Op.getValueType();
2081   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2082   SDLoc DL(GSDN);
2083   const GlobalValue *GV = GSDN->getGlobal();
2084
2085   // 64-bit SVR4 ABI code is always position-independent.
2086   // The actual address of the GlobalValue is stored in the TOC.
2087   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2088     setUsesTOCBasePtr(DAG);
2089     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2090     return getTOCEntry(DAG, DL, true, GA);
2091   }
2092
2093   unsigned MOHiFlag, MOLoFlag;
2094   bool isPIC =
2095       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV);
2096
2097   if (isPIC && Subtarget.isSVR4ABI()) {
2098     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2099                                             GSDN->getOffset(),
2100                                             PPCII::MO_PIC_FLAG);
2101     return getTOCEntry(DAG, DL, false, GA);
2102   }
2103
2104   SDValue GAHi =
2105     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2106   SDValue GALo =
2107     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2108
2109   SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
2110
2111   // If the global reference is actually to a non-lazy-pointer, we have to do an
2112   // extra load to get the address of the global.
2113   if (MOHiFlag & PPCII::MO_NLP_FLAG)
2114     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
2115                       false, false, false, 0);
2116   return Ptr;
2117 }
2118
2119 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2120   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2121   SDLoc dl(Op);
2122
2123   if (Op.getValueType() == MVT::v2i64) {
2124     // When the operands themselves are v2i64 values, we need to do something
2125     // special because VSX has no underlying comparison operations for these.
2126     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2127       // Equality can be handled by casting to the legal type for Altivec
2128       // comparisons, everything else needs to be expanded.
2129       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2130         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2131                  DAG.getSetCC(dl, MVT::v4i32,
2132                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2133                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2134                    CC));
2135       }
2136
2137       return SDValue();
2138     }
2139
2140     // We handle most of these in the usual way.
2141     return Op;
2142   }
2143
2144   // If we're comparing for equality to zero, expose the fact that this is
2145   // implented as a ctlz/srl pair on ppc, so that the dag combiner can
2146   // fold the new nodes.
2147   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2148     if (C->isNullValue() && CC == ISD::SETEQ) {
2149       EVT VT = Op.getOperand(0).getValueType();
2150       SDValue Zext = Op.getOperand(0);
2151       if (VT.bitsLT(MVT::i32)) {
2152         VT = MVT::i32;
2153         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
2154       }
2155       unsigned Log2b = Log2_32(VT.getSizeInBits());
2156       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
2157       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
2158                                 DAG.getConstant(Log2b, dl, MVT::i32));
2159       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
2160     }
2161     // Leave comparisons against 0 and -1 alone for now, since they're usually
2162     // optimized.  FIXME: revisit this when we can custom lower all setcc
2163     // optimizations.
2164     if (C->isAllOnesValue() || C->isNullValue())
2165       return SDValue();
2166   }
2167
2168   // If we have an integer seteq/setne, turn it into a compare against zero
2169   // by xor'ing the rhs with the lhs, which is faster than setting a
2170   // condition register, reading it back out, and masking the correct bit.  The
2171   // normal approach here uses sub to do this instead of xor.  Using xor exposes
2172   // the result to other bit-twiddling opportunities.
2173   EVT LHSVT = Op.getOperand(0).getValueType();
2174   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2175     EVT VT = Op.getValueType();
2176     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2177                                 Op.getOperand(1));
2178     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2179   }
2180   return SDValue();
2181 }
2182
2183 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
2184                                       const PPCSubtarget &Subtarget) const {
2185   SDNode *Node = Op.getNode();
2186   EVT VT = Node->getValueType(0);
2187   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2188   SDValue InChain = Node->getOperand(0);
2189   SDValue VAListPtr = Node->getOperand(1);
2190   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2191   SDLoc dl(Node);
2192
2193   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2194
2195   // gpr_index
2196   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2197                                     VAListPtr, MachinePointerInfo(SV), MVT::i8,
2198                                     false, false, false, 0);
2199   InChain = GprIndex.getValue(1);
2200
2201   if (VT == MVT::i64) {
2202     // Check if GprIndex is even
2203     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2204                                  DAG.getConstant(1, dl, MVT::i32));
2205     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2206                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2207     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2208                                           DAG.getConstant(1, dl, MVT::i32));
2209     // Align GprIndex to be even if it isn't
2210     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2211                            GprIndex);
2212   }
2213
2214   // fpr index is 1 byte after gpr
2215   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2216                                DAG.getConstant(1, dl, MVT::i32));
2217
2218   // fpr
2219   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2220                                     FprPtr, MachinePointerInfo(SV), MVT::i8,
2221                                     false, false, false, 0);
2222   InChain = FprIndex.getValue(1);
2223
2224   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2225                                        DAG.getConstant(8, dl, MVT::i32));
2226
2227   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2228                                         DAG.getConstant(4, dl, MVT::i32));
2229
2230   // areas
2231   SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
2232                                      MachinePointerInfo(), false, false,
2233                                      false, 0);
2234   InChain = OverflowArea.getValue(1);
2235
2236   SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
2237                                     MachinePointerInfo(), false, false,
2238                                     false, 0);
2239   InChain = RegSaveArea.getValue(1);
2240
2241   // select overflow_area if index > 8
2242   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2243                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2244
2245   // adjustment constant gpr_index * 4/8
2246   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2247                                     VT.isInteger() ? GprIndex : FprIndex,
2248                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
2249                                                     MVT::i32));
2250
2251   // OurReg = RegSaveArea + RegConstant
2252   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2253                                RegConstant);
2254
2255   // Floating types are 32 bytes into RegSaveArea
2256   if (VT.isFloatingPoint())
2257     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2258                          DAG.getConstant(32, dl, MVT::i32));
2259
2260   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2261   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2262                                    VT.isInteger() ? GprIndex : FprIndex,
2263                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
2264                                                    MVT::i32));
2265
2266   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2267                               VT.isInteger() ? VAListPtr : FprPtr,
2268                               MachinePointerInfo(SV),
2269                               MVT::i8, false, false, 0);
2270
2271   // determine if we should load from reg_save_area or overflow_area
2272   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2273
2274   // increase overflow_area by 4/8 if gpr/fpr > 8
2275   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2276                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
2277                                           dl, MVT::i32));
2278
2279   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2280                              OverflowAreaPlusN);
2281
2282   InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
2283                               OverflowAreaPtr,
2284                               MachinePointerInfo(),
2285                               MVT::i32, false, false, 0);
2286
2287   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
2288                      false, false, false, 0);
2289 }
2290
2291 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
2292                                        const PPCSubtarget &Subtarget) const {
2293   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
2294
2295   // We have to copy the entire va_list struct:
2296   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2297   return DAG.getMemcpy(Op.getOperand(0), Op,
2298                        Op.getOperand(1), Op.getOperand(2),
2299                        DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
2300                        false, MachinePointerInfo(), MachinePointerInfo());
2301 }
2302
2303 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2304                                                   SelectionDAG &DAG) const {
2305   return Op.getOperand(0);
2306 }
2307
2308 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2309                                                 SelectionDAG &DAG) const {
2310   SDValue Chain = Op.getOperand(0);
2311   SDValue Trmp = Op.getOperand(1); // trampoline
2312   SDValue FPtr = Op.getOperand(2); // nested function
2313   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2314   SDLoc dl(Op);
2315
2316   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2317   bool isPPC64 = (PtrVT == MVT::i64);
2318   Type *IntPtrTy =
2319     DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
2320                                                              *DAG.getContext());
2321
2322   TargetLowering::ArgListTy Args;
2323   TargetLowering::ArgListEntry Entry;
2324
2325   Entry.Ty = IntPtrTy;
2326   Entry.Node = Trmp; Args.push_back(Entry);
2327
2328   // TrampSize == (isPPC64 ? 48 : 40);
2329   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
2330                                isPPC64 ? MVT::i64 : MVT::i32);
2331   Args.push_back(Entry);
2332
2333   Entry.Node = FPtr; Args.push_back(Entry);
2334   Entry.Node = Nest; Args.push_back(Entry);
2335
2336   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2337   TargetLowering::CallLoweringInfo CLI(DAG);
2338   CLI.setDebugLoc(dl).setChain(Chain)
2339     .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2340                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
2341                std::move(Args), 0);
2342
2343   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2344   return CallResult.second;
2345 }
2346
2347 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
2348                                         const PPCSubtarget &Subtarget) const {
2349   MachineFunction &MF = DAG.getMachineFunction();
2350   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2351
2352   SDLoc dl(Op);
2353
2354   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
2355     // vastart just stores the address of the VarArgsFrameIndex slot into the
2356     // memory location argument.
2357     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2358     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2359     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2360     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2361                         MachinePointerInfo(SV),
2362                         false, false, 0);
2363   }
2364
2365   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2366   // We suppose the given va_list is already allocated.
2367   //
2368   // typedef struct {
2369   //  char gpr;     /* index into the array of 8 GPRs
2370   //                 * stored in the register save area
2371   //                 * gpr=0 corresponds to r3,
2372   //                 * gpr=1 to r4, etc.
2373   //                 */
2374   //  char fpr;     /* index into the array of 8 FPRs
2375   //                 * stored in the register save area
2376   //                 * fpr=0 corresponds to f1,
2377   //                 * fpr=1 to f2, etc.
2378   //                 */
2379   //  char *overflow_arg_area;
2380   //                /* location on stack that holds
2381   //                 * the next overflow argument
2382   //                 */
2383   //  char *reg_save_area;
2384   //               /* where r3:r10 and f1:f8 (if saved)
2385   //                * are stored
2386   //                */
2387   // } va_list[1];
2388
2389
2390   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
2391   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
2392
2393
2394   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2395
2396   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2397                                             PtrVT);
2398   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2399                                  PtrVT);
2400
2401   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2402   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
2403
2404   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2405   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
2406
2407   uint64_t FPROffset = 1;
2408   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
2409
2410   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2411
2412   // Store first byte : number of int regs
2413   SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
2414                                          Op.getOperand(1),
2415                                          MachinePointerInfo(SV),
2416                                          MVT::i8, false, false, 0);
2417   uint64_t nextOffset = FPROffset;
2418   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2419                                   ConstFPROffset);
2420
2421   // Store second byte : number of float regs
2422   SDValue secondStore =
2423     DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2424                       MachinePointerInfo(SV, nextOffset), MVT::i8,
2425                       false, false, 0);
2426   nextOffset += StackOffset;
2427   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2428
2429   // Store second word : arguments given on stack
2430   SDValue thirdStore =
2431     DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2432                  MachinePointerInfo(SV, nextOffset),
2433                  false, false, 0);
2434   nextOffset += FrameOffset;
2435   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2436
2437   // Store third word : arguments given in registers
2438   return DAG.getStore(thirdStore, dl, FR, nextPtr,
2439                       MachinePointerInfo(SV, nextOffset),
2440                       false, false, 0);
2441
2442 }
2443
2444 #include "PPCGenCallingConv.inc"
2445
2446 // Function whose sole purpose is to kill compiler warnings
2447 // stemming from unused functions included from PPCGenCallingConv.inc.
2448 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2449   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2450 }
2451
2452 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2453                                       CCValAssign::LocInfo &LocInfo,
2454                                       ISD::ArgFlagsTy &ArgFlags,
2455                                       CCState &State) {
2456   return true;
2457 }
2458
2459 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2460                                              MVT &LocVT,
2461                                              CCValAssign::LocInfo &LocInfo,
2462                                              ISD::ArgFlagsTy &ArgFlags,
2463                                              CCState &State) {
2464   static const MCPhysReg ArgRegs[] = {
2465     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2466     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2467   };
2468   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2469
2470   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2471
2472   // Skip one register if the first unallocated register has an even register
2473   // number and there are still argument registers available which have not been
2474   // allocated yet. RegNum is actually an index into ArgRegs, which means we
2475   // need to skip a register if RegNum is odd.
2476   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
2477     State.AllocateReg(ArgRegs[RegNum]);
2478   }
2479
2480   // Always return false here, as this function only makes sure that the first
2481   // unallocated register has an odd register number and does not actually
2482   // allocate a register for the current argument.
2483   return false;
2484 }
2485
2486 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
2487                                                MVT &LocVT,
2488                                                CCValAssign::LocInfo &LocInfo,
2489                                                ISD::ArgFlagsTy &ArgFlags,
2490                                                CCState &State) {
2491   static const MCPhysReg ArgRegs[] = {
2492     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2493     PPC::F8
2494   };
2495
2496   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2497
2498   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2499
2500   // If there is only one Floating-point register left we need to put both f64
2501   // values of a split ppc_fp128 value on the stack.
2502   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
2503     State.AllocateReg(ArgRegs[RegNum]);
2504   }
2505
2506   // Always return false here, as this function only makes sure that the two f64
2507   // values a ppc_fp128 value is split into are both passed in registers or both
2508   // passed on the stack and does not actually allocate a register for the
2509   // current argument.
2510   return false;
2511 }
2512
2513 /// FPR - The set of FP registers that should be allocated for arguments,
2514 /// on Darwin.
2515 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
2516                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
2517                                 PPC::F11, PPC::F12, PPC::F13};
2518
2519 /// QFPR - The set of QPX registers that should be allocated for arguments.
2520 static const MCPhysReg QFPR[] = {
2521     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
2522     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
2523
2524 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
2525 /// the stack.
2526 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
2527                                        unsigned PtrByteSize) {
2528   unsigned ArgSize = ArgVT.getStoreSize();
2529   if (Flags.isByVal())
2530     ArgSize = Flags.getByValSize();
2531
2532   // Round up to multiples of the pointer size, except for array members,
2533   // which are always packed.
2534   if (!Flags.isInConsecutiveRegs())
2535     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2536
2537   return ArgSize;
2538 }
2539
2540 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
2541 /// on the stack.
2542 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
2543                                             ISD::ArgFlagsTy Flags,
2544                                             unsigned PtrByteSize) {
2545   unsigned Align = PtrByteSize;
2546
2547   // Altivec parameters are padded to a 16 byte boundary.
2548   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2549       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2550       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2551       ArgVT == MVT::v1i128)
2552     Align = 16;
2553   // QPX vector types stored in double-precision are padded to a 32 byte
2554   // boundary.
2555   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
2556     Align = 32;
2557
2558   // ByVal parameters are aligned as requested.
2559   if (Flags.isByVal()) {
2560     unsigned BVAlign = Flags.getByValAlign();
2561     if (BVAlign > PtrByteSize) {
2562       if (BVAlign % PtrByteSize != 0)
2563           llvm_unreachable(
2564             "ByVal alignment is not a multiple of the pointer size");
2565
2566       Align = BVAlign;
2567     }
2568   }
2569
2570   // Array members are always packed to their original alignment.
2571   if (Flags.isInConsecutiveRegs()) {
2572     // If the array member was split into multiple registers, the first
2573     // needs to be aligned to the size of the full type.  (Except for
2574     // ppcf128, which is only aligned as its f64 components.)
2575     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
2576       Align = OrigVT.getStoreSize();
2577     else
2578       Align = ArgVT.getStoreSize();
2579   }
2580
2581   return Align;
2582 }
2583
2584 /// CalculateStackSlotUsed - Return whether this argument will use its
2585 /// stack slot (instead of being passed in registers).  ArgOffset,
2586 /// AvailableFPRs, and AvailableVRs must hold the current argument
2587 /// position, and will be updated to account for this argument.
2588 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
2589                                    ISD::ArgFlagsTy Flags,
2590                                    unsigned PtrByteSize,
2591                                    unsigned LinkageSize,
2592                                    unsigned ParamAreaSize,
2593                                    unsigned &ArgOffset,
2594                                    unsigned &AvailableFPRs,
2595                                    unsigned &AvailableVRs, bool HasQPX) {
2596   bool UseMemory = false;
2597
2598   // Respect alignment of argument on the stack.
2599   unsigned Align =
2600     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
2601   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
2602   // If there's no space left in the argument save area, we must
2603   // use memory (this check also catches zero-sized arguments).
2604   if (ArgOffset >= LinkageSize + ParamAreaSize)
2605     UseMemory = true;
2606
2607   // Allocate argument on the stack.
2608   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
2609   if (Flags.isInConsecutiveRegsLast())
2610     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2611   // If we overran the argument save area, we must use memory
2612   // (this check catches arguments passed partially in memory)
2613   if (ArgOffset > LinkageSize + ParamAreaSize)
2614     UseMemory = true;
2615
2616   // However, if the argument is actually passed in an FPR or a VR,
2617   // we don't use memory after all.
2618   if (!Flags.isByVal()) {
2619     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
2620         // QPX registers overlap with the scalar FP registers.
2621         (HasQPX && (ArgVT == MVT::v4f32 ||
2622                     ArgVT == MVT::v4f64 ||
2623                     ArgVT == MVT::v4i1)))
2624       if (AvailableFPRs > 0) {
2625         --AvailableFPRs;
2626         return false;
2627       }
2628     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2629         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2630         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2631         ArgVT == MVT::v1i128)
2632       if (AvailableVRs > 0) {
2633         --AvailableVRs;
2634         return false;
2635       }
2636   }
2637
2638   return UseMemory;
2639 }
2640
2641 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
2642 /// ensure minimum alignment required for target.
2643 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
2644                                      unsigned NumBytes) {
2645   unsigned TargetAlign = Lowering->getStackAlignment();
2646   unsigned AlignMask = TargetAlign - 1;
2647   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2648   return NumBytes;
2649 }
2650
2651 SDValue
2652 PPCTargetLowering::LowerFormalArguments(SDValue Chain,
2653                                         CallingConv::ID CallConv, bool isVarArg,
2654                                         const SmallVectorImpl<ISD::InputArg>
2655                                           &Ins,
2656                                         SDLoc dl, SelectionDAG &DAG,
2657                                         SmallVectorImpl<SDValue> &InVals)
2658                                           const {
2659   if (Subtarget.isSVR4ABI()) {
2660     if (Subtarget.isPPC64())
2661       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
2662                                          dl, DAG, InVals);
2663     else
2664       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
2665                                          dl, DAG, InVals);
2666   } else {
2667     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
2668                                        dl, DAG, InVals);
2669   }
2670 }
2671
2672 SDValue
2673 PPCTargetLowering::LowerFormalArguments_32SVR4(
2674                                       SDValue Chain,
2675                                       CallingConv::ID CallConv, bool isVarArg,
2676                                       const SmallVectorImpl<ISD::InputArg>
2677                                         &Ins,
2678                                       SDLoc dl, SelectionDAG &DAG,
2679                                       SmallVectorImpl<SDValue> &InVals) const {
2680
2681   // 32-bit SVR4 ABI Stack Frame Layout:
2682   //              +-----------------------------------+
2683   //        +-->  |            Back chain             |
2684   //        |     +-----------------------------------+
2685   //        |     | Floating-point register save area |
2686   //        |     +-----------------------------------+
2687   //        |     |    General register save area     |
2688   //        |     +-----------------------------------+
2689   //        |     |          CR save word             |
2690   //        |     +-----------------------------------+
2691   //        |     |         VRSAVE save word          |
2692   //        |     +-----------------------------------+
2693   //        |     |         Alignment padding         |
2694   //        |     +-----------------------------------+
2695   //        |     |     Vector register save area     |
2696   //        |     +-----------------------------------+
2697   //        |     |       Local variable space        |
2698   //        |     +-----------------------------------+
2699   //        |     |        Parameter list area        |
2700   //        |     +-----------------------------------+
2701   //        |     |           LR save word            |
2702   //        |     +-----------------------------------+
2703   // SP-->  +---  |            Back chain             |
2704   //              +-----------------------------------+
2705   //
2706   // Specifications:
2707   //   System V Application Binary Interface PowerPC Processor Supplement
2708   //   AltiVec Technology Programming Interface Manual
2709
2710   MachineFunction &MF = DAG.getMachineFunction();
2711   MachineFrameInfo *MFI = MF.getFrameInfo();
2712   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2713
2714   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2715   // Potential tail calls could cause overwriting of argument stack slots.
2716   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2717                        (CallConv == CallingConv::Fast));
2718   unsigned PtrByteSize = 4;
2719
2720   // Assign locations to all of the incoming arguments.
2721   SmallVector<CCValAssign, 16> ArgLocs;
2722   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2723                  *DAG.getContext());
2724
2725   // Reserve space for the linkage area on the stack.
2726   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
2727   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
2728
2729   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
2730
2731   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2732     CCValAssign &VA = ArgLocs[i];
2733
2734     // Arguments stored in registers.
2735     if (VA.isRegLoc()) {
2736       const TargetRegisterClass *RC;
2737       EVT ValVT = VA.getValVT();
2738
2739       switch (ValVT.getSimpleVT().SimpleTy) {
2740         default:
2741           llvm_unreachable("ValVT not supported by formal arguments Lowering");
2742         case MVT::i1:
2743         case MVT::i32:
2744           RC = &PPC::GPRCRegClass;
2745           break;
2746         case MVT::f32:
2747           if (Subtarget.hasP8Vector())
2748             RC = &PPC::VSSRCRegClass;
2749           else
2750             RC = &PPC::F4RCRegClass;
2751           break;
2752         case MVT::f64:
2753           if (Subtarget.hasVSX())
2754             RC = &PPC::VSFRCRegClass;
2755           else
2756             RC = &PPC::F8RCRegClass;
2757           break;
2758         case MVT::v16i8:
2759         case MVT::v8i16:
2760         case MVT::v4i32:
2761           RC = &PPC::VRRCRegClass;
2762           break;
2763         case MVT::v4f32:
2764           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
2765           break;
2766         case MVT::v2f64:
2767         case MVT::v2i64:
2768           RC = &PPC::VSHRCRegClass;
2769           break;
2770         case MVT::v4f64:
2771           RC = &PPC::QFRCRegClass;
2772           break;
2773         case MVT::v4i1:
2774           RC = &PPC::QBRCRegClass;
2775           break;
2776       }
2777
2778       // Transform the arguments stored in physical registers into virtual ones.
2779       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2780       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
2781                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
2782
2783       if (ValVT == MVT::i1)
2784         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
2785
2786       InVals.push_back(ArgValue);
2787     } else {
2788       // Argument stored in memory.
2789       assert(VA.isMemLoc());
2790
2791       unsigned ArgSize = VA.getLocVT().getStoreSize();
2792       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
2793                                       isImmutable);
2794
2795       // Create load nodes to retrieve arguments from the stack.
2796       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2797       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2798                                    MachinePointerInfo(),
2799                                    false, false, false, 0));
2800     }
2801   }
2802
2803   // Assign locations to all of the incoming aggregate by value arguments.
2804   // Aggregates passed by value are stored in the local variable space of the
2805   // caller's stack frame, right above the parameter list area.
2806   SmallVector<CCValAssign, 16> ByValArgLocs;
2807   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2808                       ByValArgLocs, *DAG.getContext());
2809
2810   // Reserve stack space for the allocations in CCInfo.
2811   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
2812
2813   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
2814
2815   // Area that is at least reserved in the caller of this function.
2816   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
2817   MinReservedArea = std::max(MinReservedArea, LinkageSize);
2818
2819   // Set the size that is at least reserved in caller of this function.  Tail
2820   // call optimized function's reserved stack space needs to be aligned so that
2821   // taking the difference between two stack areas will result in an aligned
2822   // stack.
2823   MinReservedArea =
2824       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
2825   FuncInfo->setMinReservedArea(MinReservedArea);
2826
2827   SmallVector<SDValue, 8> MemOps;
2828
2829   // If the function takes variable number of arguments, make a frame index for
2830   // the start of the first vararg value... for expansion of llvm.va_start.
2831   if (isVarArg) {
2832     static const MCPhysReg GPArgRegs[] = {
2833       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2834       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2835     };
2836     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
2837
2838     static const MCPhysReg FPArgRegs[] = {
2839       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2840       PPC::F8
2841     };
2842     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
2843     if (DisablePPCFloatInVariadic)
2844       NumFPArgRegs = 0;
2845
2846     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
2847     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
2848
2849     // Make room for NumGPArgRegs and NumFPArgRegs.
2850     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
2851                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
2852
2853     FuncInfo->setVarArgsStackOffset(
2854       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
2855                              CCInfo.getNextStackOffset(), true));
2856
2857     FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
2858     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2859
2860     // The fixed integer arguments of a variadic function are stored to the
2861     // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
2862     // the result of va_next.
2863     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
2864       // Get an existing live-in vreg, or add a new one.
2865       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
2866       if (!VReg)
2867         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
2868
2869       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2870       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2871                                    MachinePointerInfo(), false, false, 0);
2872       MemOps.push_back(Store);
2873       // Increment the address by four for the next argument to store
2874       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
2875       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2876     }
2877
2878     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
2879     // is set.
2880     // The double arguments are stored to the VarArgsFrameIndex
2881     // on the stack.
2882     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
2883       // Get an existing live-in vreg, or add a new one.
2884       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
2885       if (!VReg)
2886         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
2887
2888       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
2889       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2890                                    MachinePointerInfo(), false, false, 0);
2891       MemOps.push_back(Store);
2892       // Increment the address by eight for the next argument to store
2893       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
2894                                          PtrVT);
2895       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2896     }
2897   }
2898
2899   if (!MemOps.empty())
2900     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2901
2902   return Chain;
2903 }
2904
2905 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2906 // value to MVT::i64 and then truncate to the correct register size.
2907 SDValue
2908 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
2909                                      SelectionDAG &DAG, SDValue ArgVal,
2910                                      SDLoc dl) const {
2911   if (Flags.isSExt())
2912     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
2913                          DAG.getValueType(ObjectVT));
2914   else if (Flags.isZExt())
2915     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
2916                          DAG.getValueType(ObjectVT));
2917
2918   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
2919 }
2920
2921 SDValue
2922 PPCTargetLowering::LowerFormalArguments_64SVR4(
2923                                       SDValue Chain,
2924                                       CallingConv::ID CallConv, bool isVarArg,
2925                                       const SmallVectorImpl<ISD::InputArg>
2926                                         &Ins,
2927                                       SDLoc dl, SelectionDAG &DAG,
2928                                       SmallVectorImpl<SDValue> &InVals) const {
2929   // TODO: add description of PPC stack frame format, or at least some docs.
2930   //
2931   bool isELFv2ABI = Subtarget.isELFv2ABI();
2932   bool isLittleEndian = Subtarget.isLittleEndian();
2933   MachineFunction &MF = DAG.getMachineFunction();
2934   MachineFrameInfo *MFI = MF.getFrameInfo();
2935   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2936
2937   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
2938          "fastcc not supported on varargs functions");
2939
2940   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2941   // Potential tail calls could cause overwriting of argument stack slots.
2942   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2943                        (CallConv == CallingConv::Fast));
2944   unsigned PtrByteSize = 8;
2945   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
2946
2947   static const MCPhysReg GPR[] = {
2948     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
2949     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
2950   };
2951   static const MCPhysReg VR[] = {
2952     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
2953     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
2954   };
2955   static const MCPhysReg VSRH[] = {
2956     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
2957     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
2958   };
2959
2960   const unsigned Num_GPR_Regs = array_lengthof(GPR);
2961   const unsigned Num_FPR_Regs = 13;
2962   const unsigned Num_VR_Regs  = array_lengthof(VR);
2963   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
2964
2965   // Do a first pass over the arguments to determine whether the ABI
2966   // guarantees that our caller has allocated the parameter save area
2967   // on its stack frame.  In the ELFv1 ABI, this is always the case;
2968   // in the ELFv2 ABI, it is true if this is a vararg function or if
2969   // any parameter is located in a stack slot.
2970
2971   bool HasParameterArea = !isELFv2ABI || isVarArg;
2972   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
2973   unsigned NumBytes = LinkageSize;
2974   unsigned AvailableFPRs = Num_FPR_Regs;
2975   unsigned AvailableVRs = Num_VR_Regs;
2976   for (unsigned i = 0, e = Ins.size(); i != e; ++i)
2977     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
2978                                PtrByteSize, LinkageSize, ParamAreaSize,
2979                                NumBytes, AvailableFPRs, AvailableVRs,
2980                                Subtarget.hasQPX()))
2981       HasParameterArea = true;
2982
2983   // Add DAG nodes to load the arguments or copy them out of registers.  On
2984   // entry to a function on PPC, the arguments start after the linkage area,
2985   // although the first ones are often in registers.
2986
2987   unsigned ArgOffset = LinkageSize;
2988   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
2989   unsigned &QFPR_idx = FPR_idx;
2990   SmallVector<SDValue, 8> MemOps;
2991   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
2992   unsigned CurArgIdx = 0;
2993   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
2994     SDValue ArgVal;
2995     bool needsLoad = false;
2996     EVT ObjectVT = Ins[ArgNo].VT;
2997     EVT OrigVT = Ins[ArgNo].ArgVT;
2998     unsigned ObjSize = ObjectVT.getStoreSize();
2999     unsigned ArgSize = ObjSize;
3000     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3001     if (Ins[ArgNo].isOrigArg()) {
3002       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3003       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3004     }
3005     // We re-align the argument offset for each argument, except when using the
3006     // fast calling convention, when we need to make sure we do that only when
3007     // we'll actually use a stack slot.
3008     unsigned CurArgOffset, Align;
3009     auto ComputeArgOffset = [&]() {
3010       /* Respect alignment of argument on the stack.  */
3011       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3012       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3013       CurArgOffset = ArgOffset;
3014     };
3015
3016     if (CallConv != CallingConv::Fast) {
3017       ComputeArgOffset();
3018
3019       /* Compute GPR index associated with argument offset.  */
3020       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3021       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3022     }
3023
3024     // FIXME the codegen can be much improved in some cases.
3025     // We do not have to keep everything in memory.
3026     if (Flags.isByVal()) {
3027       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3028
3029       if (CallConv == CallingConv::Fast)
3030         ComputeArgOffset();
3031
3032       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3033       ObjSize = Flags.getByValSize();
3034       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3035       // Empty aggregate parameters do not take up registers.  Examples:
3036       //   struct { } a;
3037       //   union  { } b;
3038       //   int c[0];
3039       // etc.  However, we have to provide a place-holder in InVals, so
3040       // pretend we have an 8-byte item at the current address for that
3041       // purpose.
3042       if (!ObjSize) {
3043         int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
3044         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3045         InVals.push_back(FIN);
3046         continue;
3047       }
3048
3049       // Create a stack object covering all stack doublewords occupied
3050       // by the argument.  If the argument is (fully or partially) on
3051       // the stack, or if the argument is fully in registers but the
3052       // caller has allocated the parameter save anyway, we can refer
3053       // directly to the caller's stack frame.  Otherwise, create a
3054       // local copy in our own frame.
3055       int FI;
3056       if (HasParameterArea ||
3057           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3058         FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
3059       else
3060         FI = MFI->CreateStackObject(ArgSize, Align, false);
3061       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3062
3063       // Handle aggregates smaller than 8 bytes.
3064       if (ObjSize < PtrByteSize) {
3065         // The value of the object is its address, which differs from the
3066         // address of the enclosing doubleword on big-endian systems.
3067         SDValue Arg = FIN;
3068         if (!isLittleEndian) {
3069           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3070           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3071         }
3072         InVals.push_back(Arg);
3073
3074         if (GPR_idx != Num_GPR_Regs) {
3075           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3076           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3077           SDValue Store;
3078
3079           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3080             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3081                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
3082             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3083                                       MachinePointerInfo(FuncArg),
3084                                       ObjType, false, false, 0);
3085           } else {
3086             // For sizes that don't fit a truncating store (3, 5, 6, 7),
3087             // store the whole register as-is to the parameter save area
3088             // slot.
3089             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3090                                  MachinePointerInfo(FuncArg),
3091                                  false, false, 0);
3092           }
3093
3094           MemOps.push_back(Store);
3095         }
3096         // Whether we copied from a register or not, advance the offset
3097         // into the parameter save area by a full doubleword.
3098         ArgOffset += PtrByteSize;
3099         continue;
3100       }
3101
3102       // The value of the object is its address, which is the address of
3103       // its first stack doubleword.
3104       InVals.push_back(FIN);
3105
3106       // Store whatever pieces of the object are in registers to memory.
3107       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3108         if (GPR_idx == Num_GPR_Regs)
3109           break;
3110
3111         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3112         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3113         SDValue Addr = FIN;
3114         if (j) {
3115           SDValue Off = DAG.getConstant(j, dl, PtrVT);
3116           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3117         }
3118         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3119                                      MachinePointerInfo(FuncArg, j),
3120                                      false, false, 0);
3121         MemOps.push_back(Store);
3122         ++GPR_idx;
3123       }
3124       ArgOffset += ArgSize;
3125       continue;
3126     }
3127
3128     switch (ObjectVT.getSimpleVT().SimpleTy) {
3129     default: llvm_unreachable("Unhandled argument type!");
3130     case MVT::i1:
3131     case MVT::i32:
3132     case MVT::i64:
3133       // These can be scalar arguments or elements of an integer array type
3134       // passed directly.  Clang may use those instead of "byval" aggregate
3135       // types to avoid forcing arguments to memory unnecessarily.
3136       if (GPR_idx != Num_GPR_Regs) {
3137         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3138         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3139
3140         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3141           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3142           // value to MVT::i64 and then truncate to the correct register size.
3143           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3144       } else {
3145         if (CallConv == CallingConv::Fast)
3146           ComputeArgOffset();
3147
3148         needsLoad = true;
3149         ArgSize = PtrByteSize;
3150       }
3151       if (CallConv != CallingConv::Fast || needsLoad)
3152         ArgOffset += 8;
3153       break;
3154
3155     case MVT::f32:
3156     case MVT::f64:
3157       // These can be scalar arguments or elements of a float array type
3158       // passed directly.  The latter are used to implement ELFv2 homogenous
3159       // float aggregates.
3160       if (FPR_idx != Num_FPR_Regs) {
3161         unsigned VReg;
3162
3163         if (ObjectVT == MVT::f32)
3164           VReg = MF.addLiveIn(FPR[FPR_idx],
3165                               Subtarget.hasP8Vector()
3166                                   ? &PPC::VSSRCRegClass
3167                                   : &PPC::F4RCRegClass);
3168         else
3169           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3170                                                 ? &PPC::VSFRCRegClass
3171                                                 : &PPC::F8RCRegClass);
3172
3173         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3174         ++FPR_idx;
3175       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3176         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3177         // once we support fp <-> gpr moves.
3178
3179         // This can only ever happen in the presence of f32 array types,
3180         // since otherwise we never run out of FPRs before running out
3181         // of GPRs.
3182         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3183         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3184
3185         if (ObjectVT == MVT::f32) {
3186           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3187             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3188                                  DAG.getConstant(32, dl, MVT::i32));
3189           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3190         }
3191
3192         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3193       } else {
3194         if (CallConv == CallingConv::Fast)
3195           ComputeArgOffset();
3196
3197         needsLoad = true;
3198       }
3199
3200       // When passing an array of floats, the array occupies consecutive
3201       // space in the argument area; only round up to the next doubleword
3202       // at the end of the array.  Otherwise, each float takes 8 bytes.
3203       if (CallConv != CallingConv::Fast || needsLoad) {
3204         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3205         ArgOffset += ArgSize;
3206         if (Flags.isInConsecutiveRegsLast())
3207           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3208       }
3209       break;
3210     case MVT::v4f32:
3211     case MVT::v4i32:
3212     case MVT::v8i16:
3213     case MVT::v16i8:
3214     case MVT::v2f64:
3215     case MVT::v2i64:
3216     case MVT::v1i128:
3217       if (!Subtarget.hasQPX()) {
3218       // These can be scalar arguments or elements of a vector array type
3219       // passed directly.  The latter are used to implement ELFv2 homogenous
3220       // vector aggregates.
3221       if (VR_idx != Num_VR_Regs) {
3222         unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
3223                         MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
3224                         MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3225         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3226         ++VR_idx;
3227       } else {
3228         if (CallConv == CallingConv::Fast)
3229           ComputeArgOffset();
3230
3231         needsLoad = true;
3232       }
3233       if (CallConv != CallingConv::Fast || needsLoad)
3234         ArgOffset += 16;
3235       break;
3236       } // not QPX
3237
3238       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3239              "Invalid QPX parameter type");
3240       /* fall through */
3241
3242     case MVT::v4f64:
3243     case MVT::v4i1:
3244       // QPX vectors are treated like their scalar floating-point subregisters
3245       // (except that they're larger).
3246       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3247       if (QFPR_idx != Num_QFPR_Regs) {
3248         const TargetRegisterClass *RC;
3249         switch (ObjectVT.getSimpleVT().SimpleTy) {
3250         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3251         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3252         default:         RC = &PPC::QBRCRegClass; break;
3253         }
3254
3255         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3256         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3257         ++QFPR_idx;
3258       } else {
3259         if (CallConv == CallingConv::Fast)
3260           ComputeArgOffset();
3261         needsLoad = true;
3262       }
3263       if (CallConv != CallingConv::Fast || needsLoad)
3264         ArgOffset += Sz;
3265       break;
3266     }
3267
3268     // We need to load the argument to a virtual register if we determined
3269     // above that we ran out of physical registers of the appropriate type.
3270     if (needsLoad) {
3271       if (ObjSize < ArgSize && !isLittleEndian)
3272         CurArgOffset += ArgSize - ObjSize;
3273       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3274       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3275       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
3276                            false, false, false, 0);
3277     }
3278
3279     InVals.push_back(ArgVal);
3280   }
3281
3282   // Area that is at least reserved in the caller of this function.
3283   unsigned MinReservedArea;
3284   if (HasParameterArea)
3285     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3286   else
3287     MinReservedArea = LinkageSize;
3288
3289   // Set the size that is at least reserved in caller of this function.  Tail
3290   // call optimized functions' reserved stack space needs to be aligned so that
3291   // taking the difference between two stack areas will result in an aligned
3292   // stack.
3293   MinReservedArea =
3294       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3295   FuncInfo->setMinReservedArea(MinReservedArea);
3296
3297   // If the function takes variable number of arguments, make a frame index for
3298   // the start of the first vararg value... for expansion of llvm.va_start.
3299   if (isVarArg) {
3300     int Depth = ArgOffset;
3301
3302     FuncInfo->setVarArgsFrameIndex(
3303       MFI->CreateFixedObject(PtrByteSize, Depth, true));
3304     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3305
3306     // If this function is vararg, store any remaining integer argument regs
3307     // to their spots on the stack so that they may be loaded by deferencing the
3308     // result of va_next.
3309     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3310          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
3311       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3312       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3313       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3314                                    MachinePointerInfo(), false, false, 0);
3315       MemOps.push_back(Store);
3316       // Increment the address by four for the next argument to store
3317       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
3318       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3319     }
3320   }
3321
3322   if (!MemOps.empty())
3323     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3324
3325   return Chain;
3326 }
3327
3328 SDValue
3329 PPCTargetLowering::LowerFormalArguments_Darwin(
3330                                       SDValue Chain,
3331                                       CallingConv::ID CallConv, bool isVarArg,
3332                                       const SmallVectorImpl<ISD::InputArg>
3333                                         &Ins,
3334                                       SDLoc dl, SelectionDAG &DAG,
3335                                       SmallVectorImpl<SDValue> &InVals) const {
3336   // TODO: add description of PPC stack frame format, or at least some docs.
3337   //
3338   MachineFunction &MF = DAG.getMachineFunction();
3339   MachineFrameInfo *MFI = MF.getFrameInfo();
3340   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3341
3342   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3343   bool isPPC64 = PtrVT == MVT::i64;
3344   // Potential tail calls could cause overwriting of argument stack slots.
3345   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3346                        (CallConv == CallingConv::Fast));
3347   unsigned PtrByteSize = isPPC64 ? 8 : 4;
3348   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3349   unsigned ArgOffset = LinkageSize;
3350   // Area that is at least reserved in caller of this function.
3351   unsigned MinReservedArea = ArgOffset;
3352
3353   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
3354     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3355     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3356   };
3357   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
3358     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3359     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3360   };
3361   static const MCPhysReg VR[] = {
3362     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3363     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3364   };
3365
3366   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
3367   const unsigned Num_FPR_Regs = 13;
3368   const unsigned Num_VR_Regs  = array_lengthof( VR);
3369
3370   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3371
3372   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
3373
3374   // In 32-bit non-varargs functions, the stack space for vectors is after the
3375   // stack space for non-vectors.  We do not use this space unless we have
3376   // too many vectors to fit in registers, something that only occurs in
3377   // constructed examples:), but we have to walk the arglist to figure
3378   // that out...for the pathological case, compute VecArgOffset as the
3379   // start of the vector parameter area.  Computing VecArgOffset is the
3380   // entire point of the following loop.
3381   unsigned VecArgOffset = ArgOffset;
3382   if (!isVarArg && !isPPC64) {
3383     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
3384          ++ArgNo) {
3385       EVT ObjectVT = Ins[ArgNo].VT;
3386       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3387
3388       if (Flags.isByVal()) {
3389         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
3390         unsigned ObjSize = Flags.getByValSize();
3391         unsigned ArgSize =
3392                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3393         VecArgOffset += ArgSize;
3394         continue;
3395       }
3396
3397       switch(ObjectVT.getSimpleVT().SimpleTy) {
3398       default: llvm_unreachable("Unhandled argument type!");
3399       case MVT::i1:
3400       case MVT::i32:
3401       case MVT::f32:
3402         VecArgOffset += 4;
3403         break;
3404       case MVT::i64:  // PPC64
3405       case MVT::f64:
3406         // FIXME: We are guaranteed to be !isPPC64 at this point.
3407         // Does MVT::i64 apply?
3408         VecArgOffset += 8;
3409         break;
3410       case MVT::v4f32:
3411       case MVT::v4i32:
3412       case MVT::v8i16:
3413       case MVT::v16i8:
3414         // Nothing to do, we're only looking at Nonvector args here.
3415         break;
3416       }
3417     }
3418   }
3419   // We've found where the vector parameter area in memory is.  Skip the
3420   // first 12 parameters; these don't use that memory.
3421   VecArgOffset = ((VecArgOffset+15)/16)*16;
3422   VecArgOffset += 12*16;
3423
3424   // Add DAG nodes to load the arguments or copy them out of registers.  On
3425   // entry to a function on PPC, the arguments start after the linkage area,
3426   // although the first ones are often in registers.
3427
3428   SmallVector<SDValue, 8> MemOps;
3429   unsigned nAltivecParamsAtEnd = 0;
3430   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3431   unsigned CurArgIdx = 0;
3432   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3433     SDValue ArgVal;
3434     bool needsLoad = false;
3435     EVT ObjectVT = Ins[ArgNo].VT;
3436     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3437     unsigned ArgSize = ObjSize;
3438     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3439     if (Ins[ArgNo].isOrigArg()) {
3440       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3441       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3442     }
3443     unsigned CurArgOffset = ArgOffset;
3444
3445     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3446     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
3447         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
3448       if (isVarArg || isPPC64) {
3449         MinReservedArea = ((MinReservedArea+15)/16)*16;
3450         MinReservedArea += CalculateStackSlotSize(ObjectVT,
3451                                                   Flags,
3452                                                   PtrByteSize);
3453       } else  nAltivecParamsAtEnd++;
3454     } else
3455       // Calculate min reserved area.
3456       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
3457                                                 Flags,
3458                                                 PtrByteSize);
3459
3460     // FIXME the codegen can be much improved in some cases.
3461     // We do not have to keep everything in memory.
3462     if (Flags.isByVal()) {
3463       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3464
3465       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3466       ObjSize = Flags.getByValSize();
3467       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3468       // Objects of size 1 and 2 are right justified, everything else is
3469       // left justified.  This means the memory address is adjusted forwards.
3470       if (ObjSize==1 || ObjSize==2) {
3471         CurArgOffset = CurArgOffset + (4 - ObjSize);
3472       }
3473       // The value of the object is its address.
3474       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
3475       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3476       InVals.push_back(FIN);
3477       if (ObjSize==1 || ObjSize==2) {
3478         if (GPR_idx != Num_GPR_Regs) {
3479           unsigned VReg;
3480           if (isPPC64)
3481             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3482           else
3483             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3484           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3485           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
3486           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
3487                                             MachinePointerInfo(FuncArg),
3488                                             ObjType, false, false, 0);
3489           MemOps.push_back(Store);
3490           ++GPR_idx;
3491         }
3492
3493         ArgOffset += PtrByteSize;
3494
3495         continue;
3496       }
3497       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3498         // Store whatever pieces of the object are in registers
3499         // to memory.  ArgOffset will be the address of the beginning
3500         // of the object.
3501         if (GPR_idx != Num_GPR_Regs) {
3502           unsigned VReg;
3503           if (isPPC64)
3504             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3505           else
3506             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3507           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
3508           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3509           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3510           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3511                                        MachinePointerInfo(FuncArg, j),
3512                                        false, false, 0);
3513           MemOps.push_back(Store);
3514           ++GPR_idx;
3515           ArgOffset += PtrByteSize;
3516         } else {
3517           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
3518           break;
3519         }
3520       }
3521       continue;
3522     }
3523
3524     switch (ObjectVT.getSimpleVT().SimpleTy) {
3525     default: llvm_unreachable("Unhandled argument type!");
3526     case MVT::i1:
3527     case MVT::i32:
3528       if (!isPPC64) {
3529         if (GPR_idx != Num_GPR_Regs) {
3530           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3531           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3532
3533           if (ObjectVT == MVT::i1)
3534             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
3535
3536           ++GPR_idx;
3537         } else {
3538           needsLoad = true;
3539           ArgSize = PtrByteSize;
3540         }
3541         // All int arguments reserve stack space in the Darwin ABI.
3542         ArgOffset += PtrByteSize;
3543         break;
3544       }
3545       // FALLTHROUGH
3546     case MVT::i64:  // PPC64
3547       if (GPR_idx != Num_GPR_Regs) {
3548         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3549         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3550
3551         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3552           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3553           // value to MVT::i64 and then truncate to the correct register size.
3554           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3555
3556         ++GPR_idx;
3557       } else {
3558         needsLoad = true;
3559         ArgSize = PtrByteSize;
3560       }
3561       // All int arguments reserve stack space in the Darwin ABI.
3562       ArgOffset += 8;
3563       break;
3564
3565     case MVT::f32:
3566     case MVT::f64:
3567       // Every 4 bytes of argument space consumes one of the GPRs available for
3568       // argument passing.
3569       if (GPR_idx != Num_GPR_Regs) {
3570         ++GPR_idx;
3571         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
3572           ++GPR_idx;
3573       }
3574       if (FPR_idx != Num_FPR_Regs) {
3575         unsigned VReg;
3576
3577         if (ObjectVT == MVT::f32)
3578           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
3579         else
3580           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
3581
3582         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3583         ++FPR_idx;
3584       } else {
3585         needsLoad = true;
3586       }
3587
3588       // All FP arguments reserve stack space in the Darwin ABI.
3589       ArgOffset += isPPC64 ? 8 : ObjSize;
3590       break;
3591     case MVT::v4f32:
3592     case MVT::v4i32:
3593     case MVT::v8i16:
3594     case MVT::v16i8:
3595       // Note that vector arguments in registers don't reserve stack space,
3596       // except in varargs functions.
3597       if (VR_idx != Num_VR_Regs) {
3598         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3599         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3600         if (isVarArg) {
3601           while ((ArgOffset % 16) != 0) {
3602             ArgOffset += PtrByteSize;
3603             if (GPR_idx != Num_GPR_Regs)
3604               GPR_idx++;
3605           }
3606           ArgOffset += 16;
3607           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
3608         }
3609         ++VR_idx;
3610       } else {
3611         if (!isVarArg && !isPPC64) {
3612           // Vectors go after all the nonvectors.
3613           CurArgOffset = VecArgOffset;
3614           VecArgOffset += 16;
3615         } else {
3616           // Vectors are aligned.
3617           ArgOffset = ((ArgOffset+15)/16)*16;
3618           CurArgOffset = ArgOffset;
3619           ArgOffset += 16;
3620         }
3621         needsLoad = true;
3622       }
3623       break;
3624     }
3625
3626     // We need to load the argument to a virtual register if we determined above
3627     // that we ran out of physical registers of the appropriate type.
3628     if (needsLoad) {
3629       int FI = MFI->CreateFixedObject(ObjSize,
3630                                       CurArgOffset + (ArgSize - ObjSize),
3631                                       isImmutable);
3632       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3633       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
3634                            false, false, false, 0);
3635     }
3636
3637     InVals.push_back(ArgVal);
3638   }
3639
3640   // Allow for Altivec parameters at the end, if needed.
3641   if (nAltivecParamsAtEnd) {
3642     MinReservedArea = ((MinReservedArea+15)/16)*16;
3643     MinReservedArea += 16*nAltivecParamsAtEnd;
3644   }
3645
3646   // Area that is at least reserved in the caller of this function.
3647   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
3648
3649   // Set the size that is at least reserved in caller of this function.  Tail
3650   // call optimized functions' reserved stack space needs to be aligned so that
3651   // taking the difference between two stack areas will result in an aligned
3652   // stack.
3653   MinReservedArea =
3654       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3655   FuncInfo->setMinReservedArea(MinReservedArea);
3656
3657   // If the function takes variable number of arguments, make a frame index for
3658   // the start of the first vararg value... for expansion of llvm.va_start.
3659   if (isVarArg) {
3660     int Depth = ArgOffset;
3661
3662     FuncInfo->setVarArgsFrameIndex(
3663       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
3664                              Depth, true));
3665     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3666
3667     // If this function is vararg, store any remaining integer argument regs
3668     // to their spots on the stack so that they may be loaded by deferencing the
3669     // result of va_next.
3670     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
3671       unsigned VReg;
3672
3673       if (isPPC64)
3674         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3675       else
3676         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3677
3678       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3679       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3680                                    MachinePointerInfo(), false, false, 0);
3681       MemOps.push_back(Store);
3682       // Increment the address by four for the next argument to store
3683       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3684       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3685     }
3686   }
3687
3688   if (!MemOps.empty())
3689     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3690
3691   return Chain;
3692 }
3693
3694 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
3695 /// adjusted to accommodate the arguments for the tailcall.
3696 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
3697                                    unsigned ParamSize) {
3698
3699   if (!isTailCall) return 0;
3700
3701   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3702   unsigned CallerMinReservedArea = FI->getMinReservedArea();
3703   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
3704   // Remember only if the new adjustement is bigger.
3705   if (SPDiff < FI->getTailCallSPDelta())
3706     FI->setTailCallSPDelta(SPDiff);
3707
3708   return SPDiff;
3709 }
3710
3711 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3712 /// for tail call optimization. Targets which want to do tail call
3713 /// optimization should implement this function.
3714 bool
3715 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
3716                                                      CallingConv::ID CalleeCC,
3717                                                      bool isVarArg,
3718                                       const SmallVectorImpl<ISD::InputArg> &Ins,
3719                                                      SelectionDAG& DAG) const {
3720   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
3721     return false;
3722
3723   // Variable argument functions are not supported.
3724   if (isVarArg)
3725     return false;
3726
3727   MachineFunction &MF = DAG.getMachineFunction();
3728   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
3729   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
3730     // Functions containing by val parameters are not supported.
3731     for (unsigned i = 0; i != Ins.size(); i++) {
3732        ISD::ArgFlagsTy Flags = Ins[i].Flags;
3733        if (Flags.isByVal()) return false;
3734     }
3735
3736     // Non-PIC/GOT tail calls are supported.
3737     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
3738       return true;
3739
3740     // At the moment we can only do local tail calls (in same module, hidden
3741     // or protected) if we are generating PIC.
3742     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
3743       return G->getGlobal()->hasHiddenVisibility()
3744           || G->getGlobal()->hasProtectedVisibility();
3745   }
3746
3747   return false;
3748 }
3749
3750 /// isCallCompatibleAddress - Return the immediate to use if the specified
3751 /// 32-bit value is representable in the immediate field of a BxA instruction.
3752 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
3753   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
3754   if (!C) return nullptr;
3755
3756   int Addr = C->getZExtValue();
3757   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
3758       SignExtend32<26>(Addr) != Addr)
3759     return nullptr;  // Top 6 bits have to be sext of immediate.
3760
3761   return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op),
3762                          DAG.getTargetLoweringInfo().getPointerTy()).getNode();
3763 }
3764
3765 namespace {
3766
3767 struct TailCallArgumentInfo {
3768   SDValue Arg;
3769   SDValue FrameIdxOp;
3770   int       FrameIdx;
3771
3772   TailCallArgumentInfo() : FrameIdx(0) {}
3773 };
3774
3775 }
3776
3777 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
3778 static void
3779 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
3780                                            SDValue Chain,
3781                    const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
3782                    SmallVectorImpl<SDValue> &MemOpChains,
3783                    SDLoc dl) {
3784   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
3785     SDValue Arg = TailCallArgs[i].Arg;
3786     SDValue FIN = TailCallArgs[i].FrameIdxOp;
3787     int FI = TailCallArgs[i].FrameIdx;
3788     // Store relative to framepointer.
3789     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
3790                                        MachinePointerInfo::getFixedStack(FI),
3791                                        false, false, 0));
3792   }
3793 }
3794
3795 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
3796 /// the appropriate stack slot for the tail call optimized function call.
3797 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
3798                                                MachineFunction &MF,
3799                                                SDValue Chain,
3800                                                SDValue OldRetAddr,
3801                                                SDValue OldFP,
3802                                                int SPDiff,
3803                                                bool isPPC64,
3804                                                bool isDarwinABI,
3805                                                SDLoc dl) {
3806   if (SPDiff) {
3807     // Calculate the new stack slot for the return address.
3808     int SlotSize = isPPC64 ? 8 : 4;
3809     const PPCFrameLowering *FL =
3810         MF.getSubtarget<PPCSubtarget>().getFrameLowering();
3811     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
3812     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
3813                                                           NewRetAddrLoc, true);
3814     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3815     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
3816     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
3817                          MachinePointerInfo::getFixedStack(NewRetAddr),
3818                          false, false, 0);
3819
3820     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
3821     // slot as the FP is never overwritten.
3822     if (isDarwinABI) {
3823       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
3824       int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
3825                                                           true);
3826       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
3827       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
3828                            MachinePointerInfo::getFixedStack(NewFPIdx),
3829                            false, false, 0);
3830     }
3831   }
3832   return Chain;
3833 }
3834
3835 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
3836 /// the position of the argument.
3837 static void
3838 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
3839                          SDValue Arg, int SPDiff, unsigned ArgOffset,
3840                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
3841   int Offset = ArgOffset + SPDiff;
3842   uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
3843   int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3844   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3845   SDValue FIN = DAG.getFrameIndex(FI, VT);
3846   TailCallArgumentInfo Info;
3847   Info.Arg = Arg;
3848   Info.FrameIdxOp = FIN;
3849   Info.FrameIdx = FI;
3850   TailCallArguments.push_back(Info);
3851 }
3852
3853 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
3854 /// stack slot. Returns the chain as result and the loaded frame pointers in
3855 /// LROpOut/FPOpout. Used when tail calling.
3856 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
3857                                                         int SPDiff,
3858                                                         SDValue Chain,
3859                                                         SDValue &LROpOut,
3860                                                         SDValue &FPOpOut,
3861                                                         bool isDarwinABI,
3862                                                         SDLoc dl) const {
3863   if (SPDiff) {
3864     // Load the LR and FP stack slot for later adjusting.
3865     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
3866     LROpOut = getReturnAddrFrameIndex(DAG);
3867     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
3868                           false, false, false, 0);
3869     Chain = SDValue(LROpOut.getNode(), 1);
3870
3871     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
3872     // slot as the FP is never overwritten.
3873     if (isDarwinABI) {
3874       FPOpOut = getFramePointerFrameIndex(DAG);
3875       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
3876                             false, false, false, 0);
3877       Chain = SDValue(FPOpOut.getNode(), 1);
3878     }
3879   }
3880   return Chain;
3881 }
3882
3883 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
3884 /// by "Src" to address "Dst" of size "Size".  Alignment information is
3885 /// specified by the specific parameter attribute. The copy will be passed as
3886 /// a byval function parameter.
3887 /// Sometimes what we are copying is the end of a larger object, the part that
3888 /// does not fit in registers.
3889 static SDValue
3890 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
3891                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
3892                           SDLoc dl) {
3893   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
3894   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
3895                        false, false, false, MachinePointerInfo(),
3896                        MachinePointerInfo());
3897 }
3898
3899 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
3900 /// tail calls.
3901 static void
3902 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
3903                  SDValue Arg, SDValue PtrOff, int SPDiff,
3904                  unsigned ArgOffset, bool isPPC64, bool isTailCall,
3905                  bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
3906                  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
3907                  SDLoc dl) {
3908   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3909   if (!isTailCall) {
3910     if (isVector) {
3911       SDValue StackPtr;
3912       if (isPPC64)
3913         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
3914       else
3915         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
3916       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
3917                            DAG.getConstant(ArgOffset, dl, PtrVT));
3918     }
3919     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
3920                                        MachinePointerInfo(), false, false, 0));
3921   // Calculate and remember argument location.
3922   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
3923                                   TailCallArguments);
3924 }
3925
3926 static
3927 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
3928                      SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
3929                      SDValue LROp, SDValue FPOp, bool isDarwinABI,
3930                      SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
3931   MachineFunction &MF = DAG.getMachineFunction();
3932
3933   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
3934   // might overwrite each other in case of tail call optimization.
3935   SmallVector<SDValue, 8> MemOpChains2;
3936   // Do not flag preceding copytoreg stuff together with the following stuff.
3937   InFlag = SDValue();
3938   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
3939                                     MemOpChains2, dl);
3940   if (!MemOpChains2.empty())
3941     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3942
3943   // Store the return address to the appropriate stack slot.
3944   Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
3945                                         isPPC64, isDarwinABI, dl);
3946
3947   // Emit callseq_end just before tailcall node.
3948   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
3949                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
3950   InFlag = Chain.getValue(1);
3951 }
3952
3953 // Is this global address that of a function that can be called by name? (as
3954 // opposed to something that must hold a descriptor for an indirect call).
3955 static bool isFunctionGlobalAddress(SDValue Callee) {
3956   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3957     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
3958         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
3959       return false;
3960
3961     return G->getGlobal()->getType()->getElementType()->isFunctionTy();
3962   }
3963
3964   return false;
3965 }
3966
3967 static
3968 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
3969                      SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
3970                      bool isTailCall, bool IsPatchPoint,
3971                      SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
3972                      SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
3973                      ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
3974
3975   bool isPPC64 = Subtarget.isPPC64();
3976   bool isSVR4ABI = Subtarget.isSVR4ABI();
3977   bool isELFv2ABI = Subtarget.isELFv2ABI();
3978
3979   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
3980   NodeTys.push_back(MVT::Other);   // Returns a chain
3981   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
3982
3983   unsigned CallOpc = PPCISD::CALL;
3984
3985   bool needIndirectCall = true;
3986   if (!isSVR4ABI || !isPPC64)
3987     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
3988       // If this is an absolute destination address, use the munged value.
3989       Callee = SDValue(Dest, 0);
3990       needIndirectCall = false;
3991     }
3992
3993   if (isFunctionGlobalAddress(Callee)) {
3994     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
3995     // A call to a TLS address is actually an indirect call to a
3996     // thread-specific pointer.
3997     unsigned OpFlags = 0;
3998     if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
3999          (Subtarget.getTargetTriple().isMacOSX() &&
4000           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
4001          (G->getGlobal()->isDeclaration() ||
4002           G->getGlobal()->isWeakForLinker())) ||
4003         (Subtarget.isTargetELF() && !isPPC64 &&
4004          !G->getGlobal()->hasLocalLinkage() &&
4005          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
4006       // PC-relative references to external symbols should go through $stub,
4007       // unless we're building with the leopard linker or later, which
4008       // automatically synthesizes these stubs.
4009       OpFlags = PPCII::MO_PLT_OR_STUB;
4010     }
4011
4012     // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4013     // every direct call is) turn it into a TargetGlobalAddress /
4014     // TargetExternalSymbol node so that legalize doesn't hack it.
4015     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4016                                         Callee.getValueType(), 0, OpFlags);
4017     needIndirectCall = false;
4018   }
4019
4020   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4021     unsigned char OpFlags = 0;
4022
4023     if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
4024          (Subtarget.getTargetTriple().isMacOSX() &&
4025           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
4026         (Subtarget.isTargetELF() && !isPPC64 &&
4027          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
4028       // PC-relative references to external symbols should go through $stub,
4029       // unless we're building with the leopard linker or later, which
4030       // automatically synthesizes these stubs.
4031       OpFlags = PPCII::MO_PLT_OR_STUB;
4032     }
4033
4034     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4035                                          OpFlags);
4036     needIndirectCall = false;
4037   }
4038
4039   if (IsPatchPoint) {
4040     // We'll form an invalid direct call when lowering a patchpoint; the full
4041     // sequence for an indirect call is complicated, and many of the
4042     // instructions introduced might have side effects (and, thus, can't be
4043     // removed later). The call itself will be removed as soon as the
4044     // argument/return lowering is complete, so the fact that it has the wrong
4045     // kind of operands should not really matter.
4046     needIndirectCall = false;
4047   }
4048
4049   if (needIndirectCall) {
4050     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
4051     // to do the call, we can't use PPCISD::CALL.
4052     SDValue MTCTROps[] = {Chain, Callee, InFlag};
4053
4054     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4055       // Function pointers in the 64-bit SVR4 ABI do not point to the function
4056       // entry point, but to the function descriptor (the function entry point
4057       // address is part of the function descriptor though).
4058       // The function descriptor is a three doubleword structure with the
4059       // following fields: function entry point, TOC base address and
4060       // environment pointer.
4061       // Thus for a call through a function pointer, the following actions need
4062       // to be performed:
4063       //   1. Save the TOC of the caller in the TOC save area of its stack
4064       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4065       //   2. Load the address of the function entry point from the function
4066       //      descriptor.
4067       //   3. Load the TOC of the callee from the function descriptor into r2.
4068       //   4. Load the environment pointer from the function descriptor into
4069       //      r11.
4070       //   5. Branch to the function entry point address.
4071       //   6. On return of the callee, the TOC of the caller needs to be
4072       //      restored (this is done in FinishCall()).
4073       //
4074       // The loads are scheduled at the beginning of the call sequence, and the
4075       // register copies are flagged together to ensure that no other
4076       // operations can be scheduled in between. E.g. without flagging the
4077       // copies together, a TOC access in the caller could be scheduled between
4078       // the assignment of the callee TOC and the branch to the callee, which
4079       // results in the TOC access going through the TOC of the callee instead
4080       // of going through the TOC of the caller, which leads to incorrect code.
4081
4082       // Load the address of the function entry point from the function
4083       // descriptor.
4084       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4085       if (LDChain.getValueType() == MVT::Glue)
4086         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4087
4088       bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors();
4089
4090       MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
4091       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4092                                         false, false, LoadsInv, 8);
4093
4094       // Load environment pointer into r11.
4095       SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
4096       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4097       SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr,
4098                                        MPI.getWithOffset(16), false, false,
4099                                        LoadsInv, 8);
4100
4101       SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
4102       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4103       SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC,
4104                                    MPI.getWithOffset(8), false, false,
4105                                    LoadsInv, 8);
4106
4107       setUsesTOCBasePtr(DAG);
4108       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
4109                                         InFlag);
4110       Chain = TOCVal.getValue(0);
4111       InFlag = TOCVal.getValue(1);
4112
4113       SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
4114                                         InFlag);
4115
4116       Chain = EnvVal.getValue(0);
4117       InFlag = EnvVal.getValue(1);
4118
4119       MTCTROps[0] = Chain;
4120       MTCTROps[1] = LoadFuncPtr;
4121       MTCTROps[2] = InFlag;
4122     }
4123
4124     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
4125                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
4126     InFlag = Chain.getValue(1);
4127
4128     NodeTys.clear();
4129     NodeTys.push_back(MVT::Other);
4130     NodeTys.push_back(MVT::Glue);
4131     Ops.push_back(Chain);
4132     CallOpc = PPCISD::BCTRL;
4133     Callee.setNode(nullptr);
4134     // Add use of X11 (holding environment pointer)
4135     if (isSVR4ABI && isPPC64 && !isELFv2ABI)
4136       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
4137     // Add CTR register as callee so a bctr can be emitted later.
4138     if (isTailCall)
4139       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
4140   }
4141
4142   // If this is a direct call, pass the chain and the callee.
4143   if (Callee.getNode()) {
4144     Ops.push_back(Chain);
4145     Ops.push_back(Callee);
4146   }
4147   // If this is a tail call add stack pointer delta.
4148   if (isTailCall)
4149     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
4150
4151   // Add argument registers to the end of the list so that they are known live
4152   // into the call.
4153   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4154     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4155                                   RegsToPass[i].second.getValueType()));
4156
4157   // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
4158   // into the call.
4159   if (isSVR4ABI && isPPC64 && !IsPatchPoint) {
4160     setUsesTOCBasePtr(DAG);
4161     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
4162   }
4163
4164   return CallOpc;
4165 }
4166
4167 static
4168 bool isLocalCall(const SDValue &Callee)
4169 {
4170   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4171     return !G->getGlobal()->isDeclaration() &&
4172            !G->getGlobal()->isWeakForLinker();
4173   return false;
4174 }
4175
4176 SDValue
4177 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
4178                                    CallingConv::ID CallConv, bool isVarArg,
4179                                    const SmallVectorImpl<ISD::InputArg> &Ins,
4180                                    SDLoc dl, SelectionDAG &DAG,
4181                                    SmallVectorImpl<SDValue> &InVals) const {
4182
4183   SmallVector<CCValAssign, 16> RVLocs;
4184   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4185                     *DAG.getContext());
4186   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
4187
4188   // Copy all of the result registers out of their specified physreg.
4189   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4190     CCValAssign &VA = RVLocs[i];
4191     assert(VA.isRegLoc() && "Can only return in registers!");
4192
4193     SDValue Val = DAG.getCopyFromReg(Chain, dl,
4194                                      VA.getLocReg(), VA.getLocVT(), InFlag);
4195     Chain = Val.getValue(1);
4196     InFlag = Val.getValue(2);
4197
4198     switch (VA.getLocInfo()) {
4199     default: llvm_unreachable("Unknown loc info!");
4200     case CCValAssign::Full: break;
4201     case CCValAssign::AExt:
4202       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4203       break;
4204     case CCValAssign::ZExt:
4205       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4206                         DAG.getValueType(VA.getValVT()));
4207       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4208       break;
4209     case CCValAssign::SExt:
4210       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4211                         DAG.getValueType(VA.getValVT()));
4212       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4213       break;
4214     }
4215
4216     InVals.push_back(Val);
4217   }
4218
4219   return Chain;
4220 }
4221
4222 SDValue
4223 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
4224                               bool isTailCall, bool isVarArg, bool IsPatchPoint,
4225                               SelectionDAG &DAG,
4226                               SmallVector<std::pair<unsigned, SDValue>, 8>
4227                                 &RegsToPass,
4228                               SDValue InFlag, SDValue Chain,
4229                               SDValue CallSeqStart, SDValue &Callee,
4230                               int SPDiff, unsigned NumBytes,
4231                               const SmallVectorImpl<ISD::InputArg> &Ins,
4232                               SmallVectorImpl<SDValue> &InVals,
4233                               ImmutableCallSite *CS) const {
4234
4235   std::vector<EVT> NodeTys;
4236   SmallVector<SDValue, 8> Ops;
4237   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
4238                                  SPDiff, isTailCall, IsPatchPoint, RegsToPass,
4239                                  Ops, NodeTys, CS, Subtarget);
4240
4241   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
4242   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
4243     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
4244
4245   // When performing tail call optimization the callee pops its arguments off
4246   // the stack. Account for this here so these bytes can be pushed back on in
4247   // PPCFrameLowering::eliminateCallFramePseudoInstr.
4248   int BytesCalleePops =
4249     (CallConv == CallingConv::Fast &&
4250      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
4251
4252   // Add a register mask operand representing the call-preserved registers.
4253   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4254   const uint32_t *Mask =
4255       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
4256   assert(Mask && "Missing call preserved mask for calling convention");
4257   Ops.push_back(DAG.getRegisterMask(Mask));
4258
4259   if (InFlag.getNode())
4260     Ops.push_back(InFlag);
4261
4262   // Emit tail call.
4263   if (isTailCall) {
4264     assert(((Callee.getOpcode() == ISD::Register &&
4265              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
4266             Callee.getOpcode() == ISD::TargetExternalSymbol ||
4267             Callee.getOpcode() == ISD::TargetGlobalAddress ||
4268             isa<ConstantSDNode>(Callee)) &&
4269     "Expecting an global address, external symbol, absolute value or register");
4270
4271     DAG.getMachineFunction().getFrameInfo()->setHasTailCall();
4272     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
4273   }
4274
4275   // Add a NOP immediately after the branch instruction when using the 64-bit
4276   // SVR4 ABI. At link time, if caller and callee are in a different module and
4277   // thus have a different TOC, the call will be replaced with a call to a stub
4278   // function which saves the current TOC, loads the TOC of the callee and
4279   // branches to the callee. The NOP will be replaced with a load instruction
4280   // which restores the TOC of the caller from the TOC save slot of the current
4281   // stack frame. If caller and callee belong to the same module (and have the
4282   // same TOC), the NOP will remain unchanged.
4283
4284   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
4285       !IsPatchPoint) {
4286     if (CallOpc == PPCISD::BCTRL) {
4287       // This is a call through a function pointer.
4288       // Restore the caller TOC from the save area into R2.
4289       // See PrepareCall() for more information about calls through function
4290       // pointers in the 64-bit SVR4 ABI.
4291       // We are using a target-specific load with r2 hard coded, because the
4292       // result of a target-independent load would never go directly into r2,
4293       // since r2 is a reserved register (which prevents the register allocator
4294       // from allocating it), resulting in an additional register being
4295       // allocated and an unnecessary move instruction being generated.
4296       CallOpc = PPCISD::BCTRL_LOAD_TOC;
4297
4298       EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4299       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
4300       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
4301       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
4302       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
4303
4304       // The address needs to go after the chain input but before the flag (or
4305       // any other variadic arguments).
4306       Ops.insert(std::next(Ops.begin()), AddTOC);
4307     } else if ((CallOpc == PPCISD::CALL) &&
4308                (!isLocalCall(Callee) ||
4309                 DAG.getTarget().getRelocationModel() == Reloc::PIC_))
4310       // Otherwise insert NOP for non-local calls.
4311       CallOpc = PPCISD::CALL_NOP;
4312   }
4313
4314   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
4315   InFlag = Chain.getValue(1);
4316
4317   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4318                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
4319                              InFlag, dl);
4320   if (!Ins.empty())
4321     InFlag = Chain.getValue(1);
4322
4323   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
4324                          Ins, dl, DAG, InVals);
4325 }
4326
4327 SDValue
4328 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4329                              SmallVectorImpl<SDValue> &InVals) const {
4330   SelectionDAG &DAG                     = CLI.DAG;
4331   SDLoc &dl                             = CLI.DL;
4332   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4333   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
4334   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
4335   SDValue Chain                         = CLI.Chain;
4336   SDValue Callee                        = CLI.Callee;
4337   bool &isTailCall                      = CLI.IsTailCall;
4338   CallingConv::ID CallConv              = CLI.CallConv;
4339   bool isVarArg                         = CLI.IsVarArg;
4340   bool IsPatchPoint                     = CLI.IsPatchPoint;
4341   ImmutableCallSite *CS                 = CLI.CS;
4342
4343   if (isTailCall)
4344     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
4345                                                    Ins, DAG);
4346
4347   if (!isTailCall && CS && CS->isMustTailCall())
4348     report_fatal_error("failed to perform tail call elimination on a call "
4349                        "site marked musttail");
4350
4351   if (Subtarget.isSVR4ABI()) {
4352     if (Subtarget.isPPC64())
4353       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
4354                               isTailCall, IsPatchPoint, Outs, OutVals, Ins,
4355                               dl, DAG, InVals, CS);
4356     else
4357       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
4358                               isTailCall, IsPatchPoint, Outs, OutVals, Ins,
4359                               dl, DAG, InVals, CS);
4360   }
4361
4362   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
4363                           isTailCall, IsPatchPoint, Outs, OutVals, Ins,
4364                           dl, DAG, InVals, CS);
4365 }
4366
4367 SDValue
4368 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
4369                                     CallingConv::ID CallConv, bool isVarArg,
4370                                     bool isTailCall, bool IsPatchPoint,
4371                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4372                                     const SmallVectorImpl<SDValue> &OutVals,
4373                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4374                                     SDLoc dl, SelectionDAG &DAG,
4375                                     SmallVectorImpl<SDValue> &InVals,
4376                                     ImmutableCallSite *CS) const {
4377   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
4378   // of the 32-bit SVR4 ABI stack frame layout.
4379
4380   assert((CallConv == CallingConv::C ||
4381           CallConv == CallingConv::Fast) && "Unknown calling convention!");
4382
4383   unsigned PtrByteSize = 4;
4384
4385   MachineFunction &MF = DAG.getMachineFunction();
4386
4387   // Mark this function as potentially containing a function that contains a
4388   // tail call. As a consequence the frame pointer will be used for dynamicalloc
4389   // and restoring the callers stack pointer in this functions epilog. This is
4390   // done because by tail calling the called function might overwrite the value
4391   // in this function's (MF) stack pointer stack slot 0(SP).
4392   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4393       CallConv == CallingConv::Fast)
4394     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4395
4396   // Count how many bytes are to be pushed on the stack, including the linkage
4397   // area, parameter list area and the part of the local variable space which
4398   // contains copies of aggregates which are passed by value.
4399
4400   // Assign locations to all of the outgoing arguments.
4401   SmallVector<CCValAssign, 16> ArgLocs;
4402   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4403                  *DAG.getContext());
4404
4405   // Reserve space for the linkage area on the stack.
4406   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
4407                        PtrByteSize);
4408
4409   if (isVarArg) {
4410     // Handle fixed and variable vector arguments differently.
4411     // Fixed vector arguments go into registers as long as registers are
4412     // available. Variable vector arguments always go into memory.
4413     unsigned NumArgs = Outs.size();
4414
4415     for (unsigned i = 0; i != NumArgs; ++i) {
4416       MVT ArgVT = Outs[i].VT;
4417       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4418       bool Result;
4419
4420       if (Outs[i].IsFixed) {
4421         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
4422                                CCInfo);
4423       } else {
4424         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
4425                                       ArgFlags, CCInfo);
4426       }
4427
4428       if (Result) {
4429 #ifndef NDEBUG
4430         errs() << "Call operand #" << i << " has unhandled type "
4431              << EVT(ArgVT).getEVTString() << "\n";
4432 #endif
4433         llvm_unreachable(nullptr);
4434       }
4435     }
4436   } else {
4437     // All arguments are treated the same.
4438     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
4439   }
4440
4441   // Assign locations to all of the outgoing aggregate by value arguments.
4442   SmallVector<CCValAssign, 16> ByValArgLocs;
4443   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4444                       ByValArgLocs, *DAG.getContext());
4445
4446   // Reserve stack space for the allocations in CCInfo.
4447   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
4448
4449   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
4450
4451   // Size of the linkage area, parameter list area and the part of the local
4452   // space variable where copies of aggregates which are passed by value are
4453   // stored.
4454   unsigned NumBytes = CCByValInfo.getNextStackOffset();
4455
4456   // Calculate by how many bytes the stack has to be adjusted in case of tail
4457   // call optimization.
4458   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4459
4460   // Adjust the stack pointer for the new arguments...
4461   // These operations are automatically eliminated by the prolog/epilog pass
4462   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4463                                dl);
4464   SDValue CallSeqStart = Chain;
4465
4466   // Load the return address and frame pointer so it can be moved somewhere else
4467   // later.
4468   SDValue LROp, FPOp;
4469   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
4470                                        dl);
4471
4472   // Set up a copy of the stack pointer for use loading and storing any
4473   // arguments that may not fit in the registers available for argument
4474   // passing.
4475   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4476
4477   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4478   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4479   SmallVector<SDValue, 8> MemOpChains;
4480
4481   bool seenFloatArg = false;
4482   // Walk the register/memloc assignments, inserting copies/loads.
4483   for (unsigned i = 0, j = 0, e = ArgLocs.size();
4484        i != e;
4485        ++i) {
4486     CCValAssign &VA = ArgLocs[i];
4487     SDValue Arg = OutVals[i];
4488     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4489
4490     if (Flags.isByVal()) {
4491       // Argument is an aggregate which is passed by value, thus we need to
4492       // create a copy of it in the local variable space of the current stack
4493       // frame (which is the stack frame of the caller) and pass the address of
4494       // this copy to the callee.
4495       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
4496       CCValAssign &ByValVA = ByValArgLocs[j++];
4497       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
4498
4499       // Memory reserved in the local variable space of the callers stack frame.
4500       unsigned LocMemOffset = ByValVA.getLocMemOffset();
4501
4502       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4503       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
4504
4505       // Create a copy of the argument in the local area of the current
4506       // stack frame.
4507       SDValue MemcpyCall =
4508         CreateCopyOfByValArgument(Arg, PtrOff,
4509                                   CallSeqStart.getNode()->getOperand(0),
4510                                   Flags, DAG, dl);
4511
4512       // This must go outside the CALLSEQ_START..END.
4513       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4514                            CallSeqStart.getNode()->getOperand(1),
4515                            SDLoc(MemcpyCall));
4516       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4517                              NewCallSeqStart.getNode());
4518       Chain = CallSeqStart = NewCallSeqStart;
4519
4520       // Pass the address of the aggregate copy on the stack either in a
4521       // physical register or in the parameter list area of the current stack
4522       // frame to the callee.
4523       Arg = PtrOff;
4524     }
4525
4526     if (VA.isRegLoc()) {
4527       if (Arg.getValueType() == MVT::i1)
4528         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
4529
4530       seenFloatArg |= VA.getLocVT().isFloatingPoint();
4531       // Put argument in a physical register.
4532       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4533     } else {
4534       // Put argument in the parameter list area of the current stack frame.
4535       assert(VA.isMemLoc());
4536       unsigned LocMemOffset = VA.getLocMemOffset();
4537
4538       if (!isTailCall) {
4539         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4540         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
4541
4542         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
4543                                            MachinePointerInfo(),
4544                                            false, false, 0));
4545       } else {
4546         // Calculate and remember argument location.
4547         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
4548                                  TailCallArguments);
4549       }
4550     }
4551   }
4552
4553   if (!MemOpChains.empty())
4554     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4555
4556   // Build a sequence of copy-to-reg nodes chained together with token chain
4557   // and flag operands which copy the outgoing args into the appropriate regs.
4558   SDValue InFlag;
4559   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4560     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4561                              RegsToPass[i].second, InFlag);
4562     InFlag = Chain.getValue(1);
4563   }
4564
4565   // Set CR bit 6 to true if this is a vararg call with floating args passed in
4566   // registers.
4567   if (isVarArg) {
4568     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
4569     SDValue Ops[] = { Chain, InFlag };
4570
4571     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
4572                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
4573
4574     InFlag = Chain.getValue(1);
4575   }
4576
4577   if (isTailCall)
4578     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
4579                     false, TailCallArguments);
4580
4581   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
4582                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
4583                     NumBytes, Ins, InVals, CS);
4584 }
4585
4586 // Copy an argument into memory, being careful to do this outside the
4587 // call sequence for the call to which the argument belongs.
4588 SDValue
4589 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
4590                                               SDValue CallSeqStart,
4591                                               ISD::ArgFlagsTy Flags,
4592                                               SelectionDAG &DAG,
4593                                               SDLoc dl) const {
4594   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
4595                         CallSeqStart.getNode()->getOperand(0),
4596                         Flags, DAG, dl);
4597   // The MEMCPY must go outside the CALLSEQ_START..END.
4598   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4599                              CallSeqStart.getNode()->getOperand(1),
4600                              SDLoc(MemcpyCall));
4601   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4602                          NewCallSeqStart.getNode());
4603   return NewCallSeqStart;
4604 }
4605
4606 SDValue
4607 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
4608                                     CallingConv::ID CallConv, bool isVarArg,
4609                                     bool isTailCall, bool IsPatchPoint,
4610                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4611                                     const SmallVectorImpl<SDValue> &OutVals,
4612                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4613                                     SDLoc dl, SelectionDAG &DAG,
4614                                     SmallVectorImpl<SDValue> &InVals,
4615                                     ImmutableCallSite *CS) const {
4616
4617   bool isELFv2ABI = Subtarget.isELFv2ABI();
4618   bool isLittleEndian = Subtarget.isLittleEndian();
4619   unsigned NumOps = Outs.size();
4620
4621   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
4622   unsigned PtrByteSize = 8;
4623
4624   MachineFunction &MF = DAG.getMachineFunction();
4625
4626   // Mark this function as potentially containing a function that contains a
4627   // tail call. As a consequence the frame pointer will be used for dynamicalloc
4628   // and restoring the callers stack pointer in this functions epilog. This is
4629   // done because by tail calling the called function might overwrite the value
4630   // in this function's (MF) stack pointer stack slot 0(SP).
4631   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4632       CallConv == CallingConv::Fast)
4633     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4634
4635   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4636          "fastcc not supported on varargs functions");
4637
4638   // Count how many bytes are to be pushed on the stack, including the linkage
4639   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
4640   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
4641   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
4642   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4643   unsigned NumBytes = LinkageSize;
4644   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4645   unsigned &QFPR_idx = FPR_idx;
4646
4647   static const MCPhysReg GPR[] = {
4648     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4649     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4650   };
4651   static const MCPhysReg VR[] = {
4652     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4653     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4654   };
4655   static const MCPhysReg VSRH[] = {
4656     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
4657     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
4658   };
4659
4660   const unsigned NumGPRs = array_lengthof(GPR);
4661   const unsigned NumFPRs = 13;
4662   const unsigned NumVRs  = array_lengthof(VR);
4663   const unsigned NumQFPRs = NumFPRs;
4664
4665   // When using the fast calling convention, we don't provide backing for
4666   // arguments that will be in registers.
4667   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
4668
4669   // Add up all the space actually used.
4670   for (unsigned i = 0; i != NumOps; ++i) {
4671     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4672     EVT ArgVT = Outs[i].VT;
4673     EVT OrigVT = Outs[i].ArgVT;
4674
4675     if (CallConv == CallingConv::Fast) {
4676       if (Flags.isByVal())
4677         NumGPRsUsed += (Flags.getByValSize()+7)/8;
4678       else
4679         switch (ArgVT.getSimpleVT().SimpleTy) {
4680         default: llvm_unreachable("Unexpected ValueType for argument!");
4681         case MVT::i1:
4682         case MVT::i32:
4683         case MVT::i64:
4684           if (++NumGPRsUsed <= NumGPRs)
4685             continue;
4686           break;
4687         case MVT::v4i32:
4688         case MVT::v8i16:
4689         case MVT::v16i8:
4690         case MVT::v2f64:
4691         case MVT::v2i64:
4692         case MVT::v1i128:
4693           if (++NumVRsUsed <= NumVRs)
4694             continue;
4695           break;
4696         case MVT::v4f32:
4697           // When using QPX, this is handled like a FP register, otherwise, it
4698           // is an Altivec register.
4699           if (Subtarget.hasQPX()) {
4700             if (++NumFPRsUsed <= NumFPRs)
4701               continue;
4702           } else {
4703             if (++NumVRsUsed <= NumVRs)
4704               continue;
4705           }
4706           break;
4707         case MVT::f32:
4708         case MVT::f64:
4709         case MVT::v4f64: // QPX
4710         case MVT::v4i1:  // QPX
4711           if (++NumFPRsUsed <= NumFPRs)
4712             continue;
4713           break;
4714         }
4715     }
4716
4717     /* Respect alignment of argument on the stack.  */
4718     unsigned Align =
4719       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4720     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
4721
4722     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4723     if (Flags.isInConsecutiveRegsLast())
4724       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4725   }
4726
4727   unsigned NumBytesActuallyUsed = NumBytes;
4728
4729   // The prolog code of the callee may store up to 8 GPR argument registers to
4730   // the stack, allowing va_start to index over them in memory if its varargs.
4731   // Because we cannot tell if this is needed on the caller side, we have to
4732   // conservatively assume that it is needed.  As such, make sure we have at
4733   // least enough stack space for the caller to store the 8 GPRs.
4734   // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
4735   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
4736
4737   // Tail call needs the stack to be aligned.
4738   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4739       CallConv == CallingConv::Fast)
4740     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
4741
4742   // Calculate by how many bytes the stack has to be adjusted in case of tail
4743   // call optimization.
4744   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4745
4746   // To protect arguments on the stack from being clobbered in a tail call,
4747   // force all the loads to happen before doing any other lowering.
4748   if (isTailCall)
4749     Chain = DAG.getStackArgumentTokenFactor(Chain);
4750
4751   // Adjust the stack pointer for the new arguments...
4752   // These operations are automatically eliminated by the prolog/epilog pass
4753   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4754                                dl);
4755   SDValue CallSeqStart = Chain;
4756
4757   // Load the return address and frame pointer so it can be move somewhere else
4758   // later.
4759   SDValue LROp, FPOp;
4760   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
4761                                        dl);
4762
4763   // Set up a copy of the stack pointer for use loading and storing any
4764   // arguments that may not fit in the registers available for argument
4765   // passing.
4766   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4767
4768   // Figure out which arguments are going to go in registers, and which in
4769   // memory.  Also, if this is a vararg function, floating point operations
4770   // must be stored to our stack, and loaded into integer regs as well, if
4771   // any integer regs are available for argument passing.
4772   unsigned ArgOffset = LinkageSize;
4773
4774   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4775   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4776
4777   SmallVector<SDValue, 8> MemOpChains;
4778   for (unsigned i = 0; i != NumOps; ++i) {
4779     SDValue Arg = OutVals[i];
4780     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4781     EVT ArgVT = Outs[i].VT;
4782     EVT OrigVT = Outs[i].ArgVT;
4783
4784     // PtrOff will be used to store the current argument to the stack if a
4785     // register cannot be found for it.
4786     SDValue PtrOff;
4787
4788     // We re-align the argument offset for each argument, except when using the
4789     // fast calling convention, when we need to make sure we do that only when
4790     // we'll actually use a stack slot.
4791     auto ComputePtrOff = [&]() {
4792       /* Respect alignment of argument on the stack.  */
4793       unsigned Align =
4794         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4795       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
4796
4797       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
4798
4799       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4800     };
4801
4802     if (CallConv != CallingConv::Fast) {
4803       ComputePtrOff();
4804
4805       /* Compute GPR index associated with argument offset.  */
4806       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4807       GPR_idx = std::min(GPR_idx, NumGPRs);
4808     }
4809
4810     // Promote integers to 64-bit values.
4811     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
4812       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
4813       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4814       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
4815     }
4816
4817     // FIXME memcpy is used way more than necessary.  Correctness first.
4818     // Note: "by value" is code for passing a structure by value, not
4819     // basic types.
4820     if (Flags.isByVal()) {
4821       // Note: Size includes alignment padding, so
4822       //   struct x { short a; char b; }
4823       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
4824       // These are the proper values we need for right-justifying the
4825       // aggregate in a parameter register.
4826       unsigned Size = Flags.getByValSize();
4827
4828       // An empty aggregate parameter takes up no storage and no
4829       // registers.
4830       if (Size == 0)
4831         continue;
4832
4833       if (CallConv == CallingConv::Fast)
4834         ComputePtrOff();
4835
4836       // All aggregates smaller than 8 bytes must be passed right-justified.
4837       if (Size==1 || Size==2 || Size==4) {
4838         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
4839         if (GPR_idx != NumGPRs) {
4840           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
4841                                         MachinePointerInfo(), VT,
4842                                         false, false, false, 0);
4843           MemOpChains.push_back(Load.getValue(1));
4844           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4845
4846           ArgOffset += PtrByteSize;
4847           continue;
4848         }
4849       }
4850
4851       if (GPR_idx == NumGPRs && Size < 8) {
4852         SDValue AddPtr = PtrOff;
4853         if (!isLittleEndian) {
4854           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
4855                                           PtrOff.getValueType());
4856           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4857         }
4858         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4859                                                           CallSeqStart,
4860                                                           Flags, DAG, dl);
4861         ArgOffset += PtrByteSize;
4862         continue;
4863       }
4864       // Copy entire object into memory.  There are cases where gcc-generated
4865       // code assumes it is there, even if it could be put entirely into
4866       // registers.  (This is not what the doc says.)
4867
4868       // FIXME: The above statement is likely due to a misunderstanding of the
4869       // documents.  All arguments must be copied into the parameter area BY
4870       // THE CALLEE in the event that the callee takes the address of any
4871       // formal argument.  That has not yet been implemented.  However, it is
4872       // reasonable to use the stack area as a staging area for the register
4873       // load.
4874
4875       // Skip this for small aggregates, as we will use the same slot for a
4876       // right-justified copy, below.
4877       if (Size >= 8)
4878         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
4879                                                           CallSeqStart,
4880                                                           Flags, DAG, dl);
4881
4882       // When a register is available, pass a small aggregate right-justified.
4883       if (Size < 8 && GPR_idx != NumGPRs) {
4884         // The easiest way to get this right-justified in a register
4885         // is to copy the structure into the rightmost portion of a
4886         // local variable slot, then load the whole slot into the
4887         // register.
4888         // FIXME: The memcpy seems to produce pretty awful code for
4889         // small aggregates, particularly for packed ones.
4890         // FIXME: It would be preferable to use the slot in the
4891         // parameter save area instead of a new local variable.
4892         SDValue AddPtr = PtrOff;
4893         if (!isLittleEndian) {
4894           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
4895           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4896         }
4897         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4898                                                           CallSeqStart,
4899                                                           Flags, DAG, dl);
4900
4901         // Load the slot into the register.
4902         SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
4903                                    MachinePointerInfo(),
4904                                    false, false, false, 0);
4905         MemOpChains.push_back(Load.getValue(1));
4906         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4907
4908         // Done with this argument.
4909         ArgOffset += PtrByteSize;
4910         continue;
4911       }
4912
4913       // For aggregates larger than PtrByteSize, copy the pieces of the
4914       // object that fit into registers from the parameter save area.
4915       for (unsigned j=0; j<Size; j+=PtrByteSize) {
4916         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
4917         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
4918         if (GPR_idx != NumGPRs) {
4919           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
4920                                      MachinePointerInfo(),
4921                                      false, false, false, 0);
4922           MemOpChains.push_back(Load.getValue(1));
4923           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4924           ArgOffset += PtrByteSize;
4925         } else {
4926           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
4927           break;
4928         }
4929       }
4930       continue;
4931     }
4932
4933     switch (Arg.getSimpleValueType().SimpleTy) {
4934     default: llvm_unreachable("Unexpected ValueType for argument!");
4935     case MVT::i1:
4936     case MVT::i32:
4937     case MVT::i64:
4938       // These can be scalar arguments or elements of an integer array type
4939       // passed directly.  Clang may use those instead of "byval" aggregate
4940       // types to avoid forcing arguments to memory unnecessarily.
4941       if (GPR_idx != NumGPRs) {
4942         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
4943       } else {
4944         if (CallConv == CallingConv::Fast)
4945           ComputePtrOff();
4946
4947         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
4948                          true, isTailCall, false, MemOpChains,
4949                          TailCallArguments, dl);
4950         if (CallConv == CallingConv::Fast)
4951           ArgOffset += PtrByteSize;
4952       }
4953       if (CallConv != CallingConv::Fast)
4954         ArgOffset += PtrByteSize;
4955       break;
4956     case MVT::f32:
4957     case MVT::f64: {
4958       // These can be scalar arguments or elements of a float array type
4959       // passed directly.  The latter are used to implement ELFv2 homogenous
4960       // float aggregates.
4961
4962       // Named arguments go into FPRs first, and once they overflow, the
4963       // remaining arguments go into GPRs and then the parameter save area.
4964       // Unnamed arguments for vararg functions always go to GPRs and
4965       // then the parameter save area.  For now, put all arguments to vararg
4966       // routines always in both locations (FPR *and* GPR or stack slot).
4967       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
4968       bool NeededLoad = false;
4969
4970       // First load the argument into the next available FPR.
4971       if (FPR_idx != NumFPRs)
4972         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
4973
4974       // Next, load the argument into GPR or stack slot if needed.
4975       if (!NeedGPROrStack)
4976         ;
4977       else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
4978         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4979         // once we support fp <-> gpr moves.
4980
4981         // In the non-vararg case, this can only ever happen in the
4982         // presence of f32 array types, since otherwise we never run
4983         // out of FPRs before running out of GPRs.
4984         SDValue ArgVal;
4985
4986         // Double values are always passed in a single GPR.
4987         if (Arg.getValueType() != MVT::f32) {
4988           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
4989
4990         // Non-array float values are extended and passed in a GPR.
4991         } else if (!Flags.isInConsecutiveRegs()) {
4992           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
4993           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
4994
4995         // If we have an array of floats, we collect every odd element
4996         // together with its predecessor into one GPR.
4997         } else if (ArgOffset % PtrByteSize != 0) {
4998           SDValue Lo, Hi;
4999           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
5000           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5001           if (!isLittleEndian)
5002             std::swap(Lo, Hi);
5003           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5004
5005         // The final element, if even, goes into the first half of a GPR.
5006         } else if (Flags.isInConsecutiveRegsLast()) {
5007           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5008           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5009           if (!isLittleEndian)
5010             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
5011                                  DAG.getConstant(32, dl, MVT::i32));
5012
5013         // Non-final even elements are skipped; they will be handled
5014         // together the with subsequent argument on the next go-around.
5015         } else
5016           ArgVal = SDValue();
5017
5018         if (ArgVal.getNode())
5019           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
5020       } else {
5021         if (CallConv == CallingConv::Fast)
5022           ComputePtrOff();
5023
5024         // Single-precision floating-point values are mapped to the
5025         // second (rightmost) word of the stack doubleword.
5026         if (Arg.getValueType() == MVT::f32 &&
5027             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
5028           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5029           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5030         }
5031
5032         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5033                          true, isTailCall, false, MemOpChains,
5034                          TailCallArguments, dl);
5035
5036         NeededLoad = true;
5037       }
5038       // When passing an array of floats, the array occupies consecutive
5039       // space in the argument area; only round up to the next doubleword
5040       // at the end of the array.  Otherwise, each float takes 8 bytes.
5041       if (CallConv != CallingConv::Fast || NeededLoad) {
5042         ArgOffset += (Arg.getValueType() == MVT::f32 &&
5043                       Flags.isInConsecutiveRegs()) ? 4 : 8;
5044         if (Flags.isInConsecutiveRegsLast())
5045           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5046       }
5047       break;
5048     }
5049     case MVT::v4f32:
5050     case MVT::v4i32:
5051     case MVT::v8i16:
5052     case MVT::v16i8:
5053     case MVT::v2f64:
5054     case MVT::v2i64:
5055     case MVT::v1i128:
5056       if (!Subtarget.hasQPX()) {
5057       // These can be scalar arguments or elements of a vector array type
5058       // passed directly.  The latter are used to implement ELFv2 homogenous
5059       // vector aggregates.
5060
5061       // For a varargs call, named arguments go into VRs or on the stack as
5062       // usual; unnamed arguments always go to the stack or the corresponding
5063       // GPRs when within range.  For now, we always put the value in both
5064       // locations (or even all three).
5065       if (isVarArg) {
5066         // We could elide this store in the case where the object fits
5067         // entirely in R registers.  Maybe later.
5068         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5069                                      MachinePointerInfo(), false, false, 0);
5070         MemOpChains.push_back(Store);
5071         if (VR_idx != NumVRs) {
5072           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
5073                                      MachinePointerInfo(),
5074                                      false, false, false, 0);
5075           MemOpChains.push_back(Load.getValue(1));
5076
5077           unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
5078                            Arg.getSimpleValueType() == MVT::v2i64) ?
5079                           VSRH[VR_idx] : VR[VR_idx];
5080           ++VR_idx;
5081
5082           RegsToPass.push_back(std::make_pair(VReg, Load));
5083         }
5084         ArgOffset += 16;
5085         for (unsigned i=0; i<16; i+=PtrByteSize) {
5086           if (GPR_idx == NumGPRs)
5087             break;
5088           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5089                                    DAG.getConstant(i, dl, PtrVT));
5090           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5091                                      false, false, false, 0);
5092           MemOpChains.push_back(Load.getValue(1));
5093           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5094         }
5095         break;
5096       }
5097
5098       // Non-varargs Altivec params go into VRs or on the stack.
5099       if (VR_idx != NumVRs) {
5100         unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
5101                          Arg.getSimpleValueType() == MVT::v2i64) ?
5102                         VSRH[VR_idx] : VR[VR_idx];
5103         ++VR_idx;
5104
5105         RegsToPass.push_back(std::make_pair(VReg, Arg));
5106       } else {
5107         if (CallConv == CallingConv::Fast)
5108           ComputePtrOff();
5109
5110         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5111                          true, isTailCall, true, MemOpChains,
5112                          TailCallArguments, dl);
5113         if (CallConv == CallingConv::Fast)
5114           ArgOffset += 16;
5115       }
5116
5117       if (CallConv != CallingConv::Fast)
5118         ArgOffset += 16;
5119       break;
5120       } // not QPX
5121
5122       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
5123              "Invalid QPX parameter type");
5124
5125       /* fall through */
5126     case MVT::v4f64:
5127     case MVT::v4i1: {
5128       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
5129       if (isVarArg) {
5130         // We could elide this store in the case where the object fits
5131         // entirely in R registers.  Maybe later.
5132         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5133                                      MachinePointerInfo(), false, false, 0);
5134         MemOpChains.push_back(Store);
5135         if (QFPR_idx != NumQFPRs) {
5136           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl,
5137                                      Store, PtrOff, MachinePointerInfo(),
5138                                      false, false, false, 0);
5139           MemOpChains.push_back(Load.getValue(1));
5140           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
5141         }
5142         ArgOffset += (IsF32 ? 16 : 32);
5143         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
5144           if (GPR_idx == NumGPRs)
5145             break;
5146           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5147                                    DAG.getConstant(i, dl, PtrVT));
5148           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5149                                      false, false, false, 0);
5150           MemOpChains.push_back(Load.getValue(1));
5151           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5152         }
5153         break;
5154       }
5155
5156       // Non-varargs QPX params go into registers or on the stack.
5157       if (QFPR_idx != NumQFPRs) {
5158         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
5159       } else {
5160         if (CallConv == CallingConv::Fast)
5161           ComputePtrOff();
5162
5163         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5164                          true, isTailCall, true, MemOpChains,
5165                          TailCallArguments, dl);
5166         if (CallConv == CallingConv::Fast)
5167           ArgOffset += (IsF32 ? 16 : 32);
5168       }
5169
5170       if (CallConv != CallingConv::Fast)
5171         ArgOffset += (IsF32 ? 16 : 32);
5172       break;
5173       }
5174     }
5175   }
5176
5177   assert(NumBytesActuallyUsed == ArgOffset);
5178   (void)NumBytesActuallyUsed;
5179
5180   if (!MemOpChains.empty())
5181     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5182
5183   // Check if this is an indirect call (MTCTR/BCTRL).
5184   // See PrepareCall() for more information about calls through function
5185   // pointers in the 64-bit SVR4 ABI.
5186   if (!isTailCall && !IsPatchPoint &&
5187       !isFunctionGlobalAddress(Callee) &&
5188       !isa<ExternalSymbolSDNode>(Callee)) {
5189     // Load r2 into a virtual register and store it to the TOC save area.
5190     setUsesTOCBasePtr(DAG);
5191     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
5192     // TOC save area offset.
5193     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5194     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5195     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5196     Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
5197                          MachinePointerInfo::getStack(TOCSaveOffset),
5198                          false, false, 0);
5199     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
5200     // This does not mean the MTCTR instruction must use R12; it's easier
5201     // to model this as an extra parameter, so do that.
5202     if (isELFv2ABI && !IsPatchPoint)
5203       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
5204   }
5205
5206   // Build a sequence of copy-to-reg nodes chained together with token chain
5207   // and flag operands which copy the outgoing args into the appropriate regs.
5208   SDValue InFlag;
5209   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5210     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5211                              RegsToPass[i].second, InFlag);
5212     InFlag = Chain.getValue(1);
5213   }
5214
5215   if (isTailCall)
5216     PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
5217                     FPOp, true, TailCallArguments);
5218
5219   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
5220                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5221                     NumBytes, Ins, InVals, CS);
5222 }
5223
5224 SDValue
5225 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
5226                                     CallingConv::ID CallConv, bool isVarArg,
5227                                     bool isTailCall, bool IsPatchPoint,
5228                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
5229                                     const SmallVectorImpl<SDValue> &OutVals,
5230                                     const SmallVectorImpl<ISD::InputArg> &Ins,
5231                                     SDLoc dl, SelectionDAG &DAG,
5232                                     SmallVectorImpl<SDValue> &InVals,
5233                                     ImmutableCallSite *CS) const {
5234
5235   unsigned NumOps = Outs.size();
5236
5237   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5238   bool isPPC64 = PtrVT == MVT::i64;
5239   unsigned PtrByteSize = isPPC64 ? 8 : 4;
5240
5241   MachineFunction &MF = DAG.getMachineFunction();
5242
5243   // Mark this function as potentially containing a function that contains a
5244   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5245   // and restoring the callers stack pointer in this functions epilog. This is
5246   // done because by tail calling the called function might overwrite the value
5247   // in this function's (MF) stack pointer stack slot 0(SP).
5248   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5249       CallConv == CallingConv::Fast)
5250     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5251
5252   // Count how many bytes are to be pushed on the stack, including the linkage
5253   // area, and parameter passing area.  We start with 24/48 bytes, which is
5254   // prereserved space for [SP][CR][LR][3 x unused].
5255   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5256   unsigned NumBytes = LinkageSize;
5257
5258   // Add up all the space actually used.
5259   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
5260   // they all go in registers, but we must reserve stack space for them for
5261   // possible use by the caller.  In varargs or 64-bit calls, parameters are
5262   // assigned stack space in order, with padding so Altivec parameters are
5263   // 16-byte aligned.
5264   unsigned nAltivecParamsAtEnd = 0;
5265   for (unsigned i = 0; i != NumOps; ++i) {
5266     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5267     EVT ArgVT = Outs[i].VT;
5268     // Varargs Altivec parameters are padded to a 16 byte boundary.
5269     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
5270         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
5271         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
5272       if (!isVarArg && !isPPC64) {
5273         // Non-varargs Altivec parameters go after all the non-Altivec
5274         // parameters; handle those later so we know how much padding we need.
5275         nAltivecParamsAtEnd++;
5276         continue;
5277       }
5278       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
5279       NumBytes = ((NumBytes+15)/16)*16;
5280     }
5281     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5282   }
5283
5284   // Allow for Altivec parameters at the end, if needed.
5285   if (nAltivecParamsAtEnd) {
5286     NumBytes = ((NumBytes+15)/16)*16;
5287     NumBytes += 16*nAltivecParamsAtEnd;
5288   }
5289
5290   // The prolog code of the callee may store up to 8 GPR argument registers to
5291   // the stack, allowing va_start to index over them in memory if its varargs.
5292   // Because we cannot tell if this is needed on the caller side, we have to
5293   // conservatively assume that it is needed.  As such, make sure we have at
5294   // least enough stack space for the caller to store the 8 GPRs.
5295   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5296
5297   // Tail call needs the stack to be aligned.
5298   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5299       CallConv == CallingConv::Fast)
5300     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5301
5302   // Calculate by how many bytes the stack has to be adjusted in case of tail
5303   // call optimization.
5304   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5305
5306   // To protect arguments on the stack from being clobbered in a tail call,
5307   // force all the loads to happen before doing any other lowering.
5308   if (isTailCall)
5309     Chain = DAG.getStackArgumentTokenFactor(Chain);
5310
5311   // Adjust the stack pointer for the new arguments...
5312   // These operations are automatically eliminated by the prolog/epilog pass
5313   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5314                                dl);
5315   SDValue CallSeqStart = Chain;
5316
5317   // Load the return address and frame pointer so it can be move somewhere else
5318   // later.
5319   SDValue LROp, FPOp;
5320   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
5321                                        dl);
5322
5323   // Set up a copy of the stack pointer for use loading and storing any
5324   // arguments that may not fit in the registers available for argument
5325   // passing.
5326   SDValue StackPtr;
5327   if (isPPC64)
5328     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5329   else
5330     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5331
5332   // Figure out which arguments are going to go in registers, and which in
5333   // memory.  Also, if this is a vararg function, floating point operations
5334   // must be stored to our stack, and loaded into integer regs as well, if
5335   // any integer regs are available for argument passing.
5336   unsigned ArgOffset = LinkageSize;
5337   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5338
5339   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
5340     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
5341     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
5342   };
5343   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
5344     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5345     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5346   };
5347   static const MCPhysReg VR[] = {
5348     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5349     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5350   };
5351   const unsigned NumGPRs = array_lengthof(GPR_32);
5352   const unsigned NumFPRs = 13;
5353   const unsigned NumVRs  = array_lengthof(VR);
5354
5355   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
5356
5357   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5358   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5359
5360   SmallVector<SDValue, 8> MemOpChains;
5361   for (unsigned i = 0; i != NumOps; ++i) {
5362     SDValue Arg = OutVals[i];
5363     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5364
5365     // PtrOff will be used to store the current argument to the stack if a
5366     // register cannot be found for it.
5367     SDValue PtrOff;
5368
5369     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5370
5371     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5372
5373     // On PPC64, promote integers to 64-bit values.
5374     if (isPPC64 && Arg.getValueType() == MVT::i32) {
5375       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5376       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5377       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5378     }
5379
5380     // FIXME memcpy is used way more than necessary.  Correctness first.
5381     // Note: "by value" is code for passing a structure by value, not
5382     // basic types.
5383     if (Flags.isByVal()) {
5384       unsigned Size = Flags.getByValSize();
5385       // Very small objects are passed right-justified.  Everything else is
5386       // passed left-justified.
5387       if (Size==1 || Size==2) {
5388         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
5389         if (GPR_idx != NumGPRs) {
5390           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5391                                         MachinePointerInfo(), VT,
5392                                         false, false, false, 0);
5393           MemOpChains.push_back(Load.getValue(1));
5394           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5395
5396           ArgOffset += PtrByteSize;
5397         } else {
5398           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5399                                           PtrOff.getValueType());
5400           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5401           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5402                                                             CallSeqStart,
5403                                                             Flags, DAG, dl);
5404           ArgOffset += PtrByteSize;
5405         }
5406         continue;
5407       }
5408       // Copy entire object into memory.  There are cases where gcc-generated
5409       // code assumes it is there, even if it could be put entirely into
5410       // registers.  (This is not what the doc says.)
5411       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5412                                                         CallSeqStart,
5413                                                         Flags, DAG, dl);
5414
5415       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
5416       // copy the pieces of the object that fit into registers from the
5417       // parameter save area.
5418       for (unsigned j=0; j<Size; j+=PtrByteSize) {
5419         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5420         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5421         if (GPR_idx != NumGPRs) {
5422           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
5423                                      MachinePointerInfo(),
5424                                      false, false, false, 0);
5425           MemOpChains.push_back(Load.getValue(1));
5426           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5427           ArgOffset += PtrByteSize;
5428         } else {
5429           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5430           break;
5431         }
5432       }
5433       continue;
5434     }
5435
5436     switch (Arg.getSimpleValueType().SimpleTy) {
5437     default: llvm_unreachable("Unexpected ValueType for argument!");
5438     case MVT::i1:
5439     case MVT::i32:
5440     case MVT::i64:
5441       if (GPR_idx != NumGPRs) {
5442         if (Arg.getValueType() == MVT::i1)
5443           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
5444
5445         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5446       } else {
5447         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5448                          isPPC64, isTailCall, false, MemOpChains,
5449                          TailCallArguments, dl);
5450       }
5451       ArgOffset += PtrByteSize;
5452       break;
5453     case MVT::f32:
5454     case MVT::f64:
5455       if (FPR_idx != NumFPRs) {
5456         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5457
5458         if (isVarArg) {
5459           SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5460                                        MachinePointerInfo(), false, false, 0);
5461           MemOpChains.push_back(Store);
5462
5463           // Float varargs are always shadowed in available integer registers
5464           if (GPR_idx != NumGPRs) {
5465             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
5466                                        MachinePointerInfo(), false, false,
5467                                        false, 0);
5468             MemOpChains.push_back(Load.getValue(1));
5469             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5470           }
5471           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
5472             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5473             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5474             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
5475                                        MachinePointerInfo(),
5476                                        false, false, false, 0);
5477             MemOpChains.push_back(Load.getValue(1));
5478             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5479           }
5480         } else {
5481           // If we have any FPRs remaining, we may also have GPRs remaining.
5482           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
5483           // GPRs.
5484           if (GPR_idx != NumGPRs)
5485             ++GPR_idx;
5486           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
5487               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
5488             ++GPR_idx;
5489         }
5490       } else
5491         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5492                          isPPC64, isTailCall, false, MemOpChains,
5493                          TailCallArguments, dl);
5494       if (isPPC64)
5495         ArgOffset += 8;
5496       else
5497         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
5498       break;
5499     case MVT::v4f32:
5500     case MVT::v4i32:
5501     case MVT::v8i16:
5502     case MVT::v16i8:
5503       if (isVarArg) {
5504         // These go aligned on the stack, or in the corresponding R registers
5505         // when within range.  The Darwin PPC ABI doc claims they also go in
5506         // V registers; in fact gcc does this only for arguments that are
5507         // prototyped, not for those that match the ...  We do it for all
5508         // arguments, seems to work.
5509         while (ArgOffset % 16 !=0) {
5510           ArgOffset += PtrByteSize;
5511           if (GPR_idx != NumGPRs)
5512             GPR_idx++;
5513         }
5514         // We could elide this store in the case where the object fits
5515         // entirely in R registers.  Maybe later.
5516         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5517                              DAG.getConstant(ArgOffset, dl, PtrVT));
5518         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5519                                      MachinePointerInfo(), false, false, 0);
5520         MemOpChains.push_back(Store);
5521         if (VR_idx != NumVRs) {
5522           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
5523                                      MachinePointerInfo(),
5524                                      false, false, false, 0);
5525           MemOpChains.push_back(Load.getValue(1));
5526           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5527         }
5528         ArgOffset += 16;
5529         for (unsigned i=0; i<16; i+=PtrByteSize) {
5530           if (GPR_idx == NumGPRs)
5531             break;
5532           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5533                                    DAG.getConstant(i, dl, PtrVT));
5534           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5535                                      false, false, false, 0);
5536           MemOpChains.push_back(Load.getValue(1));
5537           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5538         }
5539         break;
5540       }
5541
5542       // Non-varargs Altivec params generally go in registers, but have
5543       // stack space allocated at the end.
5544       if (VR_idx != NumVRs) {
5545         // Doesn't have GPR space allocated.
5546         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5547       } else if (nAltivecParamsAtEnd==0) {
5548         // We are emitting Altivec params in order.
5549         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5550                          isPPC64, isTailCall, true, MemOpChains,
5551                          TailCallArguments, dl);
5552         ArgOffset += 16;
5553       }
5554       break;
5555     }
5556   }
5557   // If all Altivec parameters fit in registers, as they usually do,
5558   // they get stack space following the non-Altivec parameters.  We
5559   // don't track this here because nobody below needs it.
5560   // If there are more Altivec parameters than fit in registers emit
5561   // the stores here.
5562   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
5563     unsigned j = 0;
5564     // Offset is aligned; skip 1st 12 params which go in V registers.
5565     ArgOffset = ((ArgOffset+15)/16)*16;
5566     ArgOffset += 12*16;
5567     for (unsigned i = 0; i != NumOps; ++i) {
5568       SDValue Arg = OutVals[i];
5569       EVT ArgType = Outs[i].VT;
5570       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
5571           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
5572         if (++j > NumVRs) {
5573           SDValue PtrOff;
5574           // We are emitting Altivec params in order.
5575           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5576                            isPPC64, isTailCall, true, MemOpChains,
5577                            TailCallArguments, dl);
5578           ArgOffset += 16;
5579         }
5580       }
5581     }
5582   }
5583
5584   if (!MemOpChains.empty())
5585     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5586
5587   // On Darwin, R12 must contain the address of an indirect callee.  This does
5588   // not mean the MTCTR instruction must use R12; it's easier to model this as
5589   // an extra parameter, so do that.
5590   if (!isTailCall &&
5591       !isFunctionGlobalAddress(Callee) &&
5592       !isa<ExternalSymbolSDNode>(Callee) &&
5593       !isBLACompatibleAddress(Callee, DAG))
5594     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
5595                                                    PPC::R12), Callee));
5596
5597   // Build a sequence of copy-to-reg nodes chained together with token chain
5598   // and flag operands which copy the outgoing args into the appropriate regs.
5599   SDValue InFlag;
5600   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5601     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5602                              RegsToPass[i].second, InFlag);
5603     InFlag = Chain.getValue(1);
5604   }
5605
5606   if (isTailCall)
5607     PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
5608                     FPOp, true, TailCallArguments);
5609
5610   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
5611                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5612                     NumBytes, Ins, InVals, CS);
5613 }
5614
5615 bool
5616 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
5617                                   MachineFunction &MF, bool isVarArg,
5618                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
5619                                   LLVMContext &Context) const {
5620   SmallVector<CCValAssign, 16> RVLocs;
5621   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
5622   return CCInfo.CheckReturn(Outs, RetCC_PPC);
5623 }
5624
5625 SDValue
5626 PPCTargetLowering::LowerReturn(SDValue Chain,
5627                                CallingConv::ID CallConv, bool isVarArg,
5628                                const SmallVectorImpl<ISD::OutputArg> &Outs,
5629                                const SmallVectorImpl<SDValue> &OutVals,
5630                                SDLoc dl, SelectionDAG &DAG) const {
5631
5632   SmallVector<CCValAssign, 16> RVLocs;
5633   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5634                  *DAG.getContext());
5635   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
5636
5637   SDValue Flag;
5638   SmallVector<SDValue, 4> RetOps(1, Chain);
5639
5640   // Copy the result values into the output registers.
5641   for (unsigned i = 0; i != RVLocs.size(); ++i) {
5642     CCValAssign &VA = RVLocs[i];
5643     assert(VA.isRegLoc() && "Can only return in registers!");
5644
5645     SDValue Arg = OutVals[i];
5646
5647     switch (VA.getLocInfo()) {
5648     default: llvm_unreachable("Unknown loc info!");
5649     case CCValAssign::Full: break;
5650     case CCValAssign::AExt:
5651       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
5652       break;
5653     case CCValAssign::ZExt:
5654       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
5655       break;
5656     case CCValAssign::SExt:
5657       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
5658       break;
5659     }
5660
5661     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
5662     Flag = Chain.getValue(1);
5663     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5664   }
5665
5666   RetOps[0] = Chain;  // Update chain.
5667
5668   // Add the flag if we have it.
5669   if (Flag.getNode())
5670     RetOps.push_back(Flag);
5671
5672   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
5673 }
5674
5675 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
5676                                    const PPCSubtarget &Subtarget) const {
5677   // When we pop the dynamic allocation we need to restore the SP link.
5678   SDLoc dl(Op);
5679
5680   // Get the corect type for pointers.
5681   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5682
5683   // Construct the stack pointer operand.
5684   bool isPPC64 = Subtarget.isPPC64();
5685   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
5686   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
5687
5688   // Get the operands for the STACKRESTORE.
5689   SDValue Chain = Op.getOperand(0);
5690   SDValue SaveSP = Op.getOperand(1);
5691
5692   // Load the old link SP.
5693   SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
5694                                    MachinePointerInfo(),
5695                                    false, false, false, 0);
5696
5697   // Restore the stack pointer.
5698   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
5699
5700   // Store the old link SP.
5701   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
5702                       false, false, 0);
5703 }
5704
5705
5706
5707 SDValue
5708 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
5709   MachineFunction &MF = DAG.getMachineFunction();
5710   bool isPPC64 = Subtarget.isPPC64();
5711   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5712
5713   // Get current frame pointer save index.  The users of this index will be
5714   // primarily DYNALLOC instructions.
5715   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
5716   int RASI = FI->getReturnAddrSaveIndex();
5717
5718   // If the frame pointer save index hasn't been defined yet.
5719   if (!RASI) {
5720     // Find out what the fix offset of the frame pointer save area.
5721     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
5722     // Allocate the frame index for frame pointer save area.
5723     RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
5724     // Save the result.
5725     FI->setReturnAddrSaveIndex(RASI);
5726   }
5727   return DAG.getFrameIndex(RASI, PtrVT);
5728 }
5729
5730 SDValue
5731 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
5732   MachineFunction &MF = DAG.getMachineFunction();
5733   bool isPPC64 = Subtarget.isPPC64();
5734   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5735
5736   // Get current frame pointer save index.  The users of this index will be
5737   // primarily DYNALLOC instructions.
5738   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
5739   int FPSI = FI->getFramePointerSaveIndex();
5740
5741   // If the frame pointer save index hasn't been defined yet.
5742   if (!FPSI) {
5743     // Find out what the fix offset of the frame pointer save area.
5744     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
5745     // Allocate the frame index for frame pointer save area.
5746     FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
5747     // Save the result.
5748     FI->setFramePointerSaveIndex(FPSI);
5749   }
5750   return DAG.getFrameIndex(FPSI, PtrVT);
5751 }
5752
5753 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
5754                                          SelectionDAG &DAG,
5755                                          const PPCSubtarget &Subtarget) const {
5756   // Get the inputs.
5757   SDValue Chain = Op.getOperand(0);
5758   SDValue Size  = Op.getOperand(1);
5759   SDLoc dl(Op);
5760
5761   // Get the corect type for pointers.
5762   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
5763   // Negate the size.
5764   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
5765                                 DAG.getConstant(0, dl, PtrVT), Size);
5766   // Construct a node for the frame pointer save index.
5767   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
5768   // Build a DYNALLOC node.
5769   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
5770   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
5771   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
5772 }
5773
5774 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
5775                                                SelectionDAG &DAG) const {
5776   SDLoc DL(Op);
5777   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
5778                      DAG.getVTList(MVT::i32, MVT::Other),
5779                      Op.getOperand(0), Op.getOperand(1));
5780 }
5781
5782 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
5783                                                 SelectionDAG &DAG) const {
5784   SDLoc DL(Op);
5785   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
5786                      Op.getOperand(0), Op.getOperand(1));
5787 }
5788
5789 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
5790   if (Op.getValueType().isVector())
5791     return LowerVectorLoad(Op, DAG);
5792
5793   assert(Op.getValueType() == MVT::i1 &&
5794          "Custom lowering only for i1 loads");
5795
5796   // First, load 8 bits into 32 bits, then truncate to 1 bit.
5797
5798   SDLoc dl(Op);
5799   LoadSDNode *LD = cast<LoadSDNode>(Op);
5800
5801   SDValue Chain = LD->getChain();
5802   SDValue BasePtr = LD->getBasePtr();
5803   MachineMemOperand *MMO = LD->getMemOperand();
5804
5805   SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
5806                                  BasePtr, MVT::i8, MMO);
5807   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
5808
5809   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
5810   return DAG.getMergeValues(Ops, dl);
5811 }
5812
5813 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
5814   if (Op.getOperand(1).getValueType().isVector())
5815     return LowerVectorStore(Op, DAG);
5816
5817   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
5818          "Custom lowering only for i1 stores");
5819
5820   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
5821
5822   SDLoc dl(Op);
5823   StoreSDNode *ST = cast<StoreSDNode>(Op);
5824
5825   SDValue Chain = ST->getChain();
5826   SDValue BasePtr = ST->getBasePtr();
5827   SDValue Value = ST->getValue();
5828   MachineMemOperand *MMO = ST->getMemOperand();
5829
5830   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
5831   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
5832 }
5833
5834 // FIXME: Remove this once the ANDI glue bug is fixed:
5835 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
5836   assert(Op.getValueType() == MVT::i1 &&
5837          "Custom lowering only for i1 results");
5838
5839   SDLoc DL(Op);
5840   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
5841                      Op.getOperand(0));
5842 }
5843
5844 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
5845 /// possible.
5846 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5847   // Not FP? Not a fsel.
5848   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
5849       !Op.getOperand(2).getValueType().isFloatingPoint())
5850     return Op;
5851
5852   // We might be able to do better than this under some circumstances, but in
5853   // general, fsel-based lowering of select is a finite-math-only optimization.
5854   // For more information, see section F.3 of the 2.06 ISA specification.
5855   if (!DAG.getTarget().Options.NoInfsFPMath ||
5856       !DAG.getTarget().Options.NoNaNsFPMath)
5857     return Op;
5858
5859   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5860
5861   EVT ResVT = Op.getValueType();
5862   EVT CmpVT = Op.getOperand(0).getValueType();
5863   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
5864   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
5865   SDLoc dl(Op);
5866
5867   // If the RHS of the comparison is a 0.0, we don't need to do the
5868   // subtraction at all.
5869   SDValue Sel1;
5870   if (isFloatingPointZero(RHS))
5871     switch (CC) {
5872     default: break;       // SETUO etc aren't handled by fsel.
5873     case ISD::SETNE:
5874       std::swap(TV, FV);
5875     case ISD::SETEQ:
5876       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
5877         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
5878       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
5879       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
5880         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
5881       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
5882                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
5883     case ISD::SETULT:
5884     case ISD::SETLT:
5885       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
5886     case ISD::SETOGE:
5887     case ISD::SETGE:
5888       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
5889         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
5890       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
5891     case ISD::SETUGT:
5892     case ISD::SETGT:
5893       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
5894     case ISD::SETOLE:
5895     case ISD::SETLE:
5896       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
5897         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
5898       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
5899                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
5900     }
5901
5902   SDValue Cmp;
5903   switch (CC) {
5904   default: break;       // SETUO etc aren't handled by fsel.
5905   case ISD::SETNE:
5906     std::swap(TV, FV);
5907   case ISD::SETEQ:
5908     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
5909     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5910       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5911     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
5912     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
5913       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
5914     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
5915                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
5916   case ISD::SETULT:
5917   case ISD::SETLT:
5918     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
5919     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5920       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5921     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
5922   case ISD::SETOGE:
5923   case ISD::SETGE:
5924     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
5925     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5926       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5927     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
5928   case ISD::SETUGT:
5929   case ISD::SETGT:
5930     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
5931     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5932       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5933     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
5934   case ISD::SETOLE:
5935   case ISD::SETLE:
5936     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
5937     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
5938       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
5939     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
5940   }
5941   return Op;
5942 }
5943
5944 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
5945                                                SelectionDAG &DAG,
5946                                                SDLoc dl) const {
5947   assert(Op.getOperand(0).getValueType().isFloatingPoint());
5948   SDValue Src = Op.getOperand(0);
5949   if (Src.getValueType() == MVT::f32)
5950     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
5951
5952   SDValue Tmp;
5953   switch (Op.getSimpleValueType().SimpleTy) {
5954   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
5955   case MVT::i32:
5956     Tmp = DAG.getNode(
5957         Op.getOpcode() == ISD::FP_TO_SINT
5958             ? PPCISD::FCTIWZ
5959             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
5960         dl, MVT::f64, Src);
5961     break;
5962   case MVT::i64:
5963     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
5964            "i64 FP_TO_UINT is supported only with FPCVT");
5965     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
5966                                                         PPCISD::FCTIDUZ,
5967                       dl, MVT::f64, Src);
5968     break;
5969   }
5970
5971   // Convert the FP value to an int value through memory.
5972   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
5973     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
5974   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
5975   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
5976   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
5977
5978   // Emit a store to the stack slot.
5979   SDValue Chain;
5980   if (i32Stack) {
5981     MachineFunction &MF = DAG.getMachineFunction();
5982     MachineMemOperand *MMO =
5983       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
5984     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
5985     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
5986               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
5987   } else
5988     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
5989                          MPI, false, false, 0);
5990
5991   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
5992   // add in a bias.
5993   if (Op.getValueType() == MVT::i32 && !i32Stack) {
5994     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
5995                         DAG.getConstant(4, dl, FIPtr.getValueType()));
5996     MPI = MPI.getWithOffset(4);
5997   }
5998
5999   RLI.Chain = Chain;
6000   RLI.Ptr = FIPtr;
6001   RLI.MPI = MPI;
6002 }
6003
6004 /// \brief Custom lowers floating point to integer conversions to use
6005 /// the direct move instructions available in ISA 2.07 to avoid the
6006 /// need for load/store combinations.
6007 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
6008                                                     SelectionDAG &DAG,
6009                                                     SDLoc dl) const {
6010   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6011   SDValue Src = Op.getOperand(0);
6012
6013   if (Src.getValueType() == MVT::f32)
6014     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6015
6016   SDValue Tmp;
6017   switch (Op.getSimpleValueType().SimpleTy) {
6018   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6019   case MVT::i32:
6020     Tmp = DAG.getNode(
6021         Op.getOpcode() == ISD::FP_TO_SINT
6022             ? PPCISD::FCTIWZ
6023             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6024         dl, MVT::f64, Src);
6025     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
6026     break;
6027   case MVT::i64:
6028     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6029            "i64 FP_TO_UINT is supported only with FPCVT");
6030     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6031                                                         PPCISD::FCTIDUZ,
6032                       dl, MVT::f64, Src);
6033     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
6034     break;
6035   }
6036   return Tmp;
6037 }
6038
6039 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
6040                                           SDLoc dl) const {
6041   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
6042     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
6043
6044   ReuseLoadInfo RLI;
6045   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6046
6047   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
6048                      false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
6049                      RLI.Ranges);
6050 }
6051
6052 // We're trying to insert a regular store, S, and then a load, L. If the
6053 // incoming value, O, is a load, we might just be able to have our load use the
6054 // address used by O. However, we don't know if anything else will store to
6055 // that address before we can load from it. To prevent this situation, we need
6056 // to insert our load, L, into the chain as a peer of O. To do this, we give L
6057 // the same chain operand as O, we create a token factor from the chain results
6058 // of O and L, and we replace all uses of O's chain result with that token
6059 // factor (see spliceIntoChain below for this last part).
6060 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
6061                                             ReuseLoadInfo &RLI,
6062                                             SelectionDAG &DAG,
6063                                             ISD::LoadExtType ET) const {
6064   SDLoc dl(Op);
6065   if (ET == ISD::NON_EXTLOAD &&
6066       (Op.getOpcode() == ISD::FP_TO_UINT ||
6067        Op.getOpcode() == ISD::FP_TO_SINT) &&
6068       isOperationLegalOrCustom(Op.getOpcode(),
6069                                Op.getOperand(0).getValueType())) {
6070
6071     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6072     return true;
6073   }
6074
6075   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
6076   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
6077       LD->isNonTemporal())
6078     return false;
6079   if (LD->getMemoryVT() != MemVT)
6080     return false;
6081
6082   RLI.Ptr = LD->getBasePtr();
6083   if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) {
6084     assert(LD->getAddressingMode() == ISD::PRE_INC &&
6085            "Non-pre-inc AM on PPC?");
6086     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
6087                           LD->getOffset());
6088   }
6089
6090   RLI.Chain = LD->getChain();
6091   RLI.MPI = LD->getPointerInfo();
6092   RLI.IsInvariant = LD->isInvariant();
6093   RLI.Alignment = LD->getAlignment();
6094   RLI.AAInfo = LD->getAAInfo();
6095   RLI.Ranges = LD->getRanges();
6096
6097   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
6098   return true;
6099 }
6100
6101 // Given the head of the old chain, ResChain, insert a token factor containing
6102 // it and NewResChain, and make users of ResChain now be users of that token
6103 // factor.
6104 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
6105                                         SDValue NewResChain,
6106                                         SelectionDAG &DAG) const {
6107   if (!ResChain)
6108     return;
6109
6110   SDLoc dl(NewResChain);
6111
6112   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
6113                            NewResChain, DAG.getUNDEF(MVT::Other));
6114   assert(TF.getNode() != NewResChain.getNode() &&
6115          "A new TF really is required here");
6116
6117   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
6118   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
6119 }
6120
6121 /// \brief Custom lowers integer to floating point conversions to use
6122 /// the direct move instructions available in ISA 2.07 to avoid the
6123 /// need for load/store combinations.
6124 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
6125                                                     SelectionDAG &DAG,
6126                                                     SDLoc dl) const {
6127   assert((Op.getValueType() == MVT::f32 ||
6128           Op.getValueType() == MVT::f64) &&
6129          "Invalid floating point type as target of conversion");
6130   assert(Subtarget.hasFPCVT() &&
6131          "Int to FP conversions with direct moves require FPCVT");
6132   SDValue FP;
6133   SDValue Src = Op.getOperand(0);
6134   bool SinglePrec = Op.getValueType() == MVT::f32;
6135   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
6136   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
6137   unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
6138                              (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
6139
6140   if (WordInt) {
6141     FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
6142                      dl, MVT::f64, Src);
6143     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6144   }
6145   else {
6146     FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
6147     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6148   }
6149
6150   return FP;
6151 }
6152
6153 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
6154                                           SelectionDAG &DAG) const {
6155   SDLoc dl(Op);
6156
6157   if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
6158     if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
6159       return SDValue();
6160
6161     SDValue Value = Op.getOperand(0);
6162     // The values are now known to be -1 (false) or 1 (true). To convert this
6163     // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
6164     // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
6165     Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
6166
6167     SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
6168     FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
6169                           FPHalfs, FPHalfs, FPHalfs, FPHalfs);
6170
6171     Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
6172
6173     if (Op.getValueType() != MVT::v4f64)
6174       Value = DAG.getNode(ISD::FP_ROUND, dl,
6175                           Op.getValueType(), Value,
6176                           DAG.getIntPtrConstant(1, dl));
6177     return Value;
6178   }
6179
6180   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
6181   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
6182     return SDValue();
6183
6184   if (Op.getOperand(0).getValueType() == MVT::i1)
6185     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
6186                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
6187                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
6188
6189   // If we have direct moves, we can do all the conversion, skip the store/load
6190   // however, without FPCVT we can't do most conversions.
6191   if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT())
6192     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
6193
6194   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
6195          "UINT_TO_FP is supported only with FPCVT");
6196
6197   // If we have FCFIDS, then use it when converting to single-precision.
6198   // Otherwise, convert to double-precision and then round.
6199   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6200                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
6201                                                             : PPCISD::FCFIDS)
6202                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
6203                                                             : PPCISD::FCFID);
6204   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6205                   ? MVT::f32
6206                   : MVT::f64;
6207
6208   if (Op.getOperand(0).getValueType() == MVT::i64) {
6209     SDValue SINT = Op.getOperand(0);
6210     // When converting to single-precision, we actually need to convert
6211     // to double-precision first and then round to single-precision.
6212     // To avoid double-rounding effects during that operation, we have
6213     // to prepare the input operand.  Bits that might be truncated when
6214     // converting to double-precision are replaced by a bit that won't
6215     // be lost at this stage, but is below the single-precision rounding
6216     // position.
6217     //
6218     // However, if -enable-unsafe-fp-math is in effect, accept double
6219     // rounding to avoid the extra overhead.
6220     if (Op.getValueType() == MVT::f32 &&
6221         !Subtarget.hasFPCVT() &&
6222         !DAG.getTarget().Options.UnsafeFPMath) {
6223
6224       // Twiddle input to make sure the low 11 bits are zero.  (If this
6225       // is the case, we are guaranteed the value will fit into the 53 bit
6226       // mantissa of an IEEE double-precision value without rounding.)
6227       // If any of those low 11 bits were not zero originally, make sure
6228       // bit 12 (value 2048) is set instead, so that the final rounding
6229       // to single-precision gets the correct result.
6230       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6231                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
6232       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
6233                           Round, DAG.getConstant(2047, dl, MVT::i64));
6234       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
6235       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6236                           Round, DAG.getConstant(-2048, dl, MVT::i64));
6237
6238       // However, we cannot use that value unconditionally: if the magnitude
6239       // of the input value is small, the bit-twiddling we did above might
6240       // end up visibly changing the output.  Fortunately, in that case, we
6241       // don't need to twiddle bits since the original input will convert
6242       // exactly to double-precision floating-point already.  Therefore,
6243       // construct a conditional to use the original value if the top 11
6244       // bits are all sign-bit copies, and use the rounded value computed
6245       // above otherwise.
6246       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
6247                                  SINT, DAG.getConstant(53, dl, MVT::i32));
6248       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
6249                          Cond, DAG.getConstant(1, dl, MVT::i64));
6250       Cond = DAG.getSetCC(dl, MVT::i32,
6251                           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
6252
6253       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
6254     }
6255
6256     ReuseLoadInfo RLI;
6257     SDValue Bits;
6258
6259     MachineFunction &MF = DAG.getMachineFunction();
6260     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
6261       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
6262                          false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
6263                          RLI.Ranges);
6264       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6265     } else if (Subtarget.hasLFIWAX() &&
6266                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
6267       MachineMemOperand *MMO =
6268         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6269                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6270       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6271       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
6272                                      DAG.getVTList(MVT::f64, MVT::Other),
6273                                      Ops, MVT::i32, MMO);
6274       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6275     } else if (Subtarget.hasFPCVT() &&
6276                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
6277       MachineMemOperand *MMO =
6278         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6279                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6280       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6281       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
6282                                      DAG.getVTList(MVT::f64, MVT::Other),
6283                                      Ops, MVT::i32, MMO);
6284       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6285     } else if (((Subtarget.hasLFIWAX() &&
6286                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
6287                 (Subtarget.hasFPCVT() &&
6288                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
6289                SINT.getOperand(0).getValueType() == MVT::i32) {
6290       MachineFrameInfo *FrameInfo = MF.getFrameInfo();
6291       EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
6292
6293       int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
6294       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6295
6296       SDValue Store =
6297         DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
6298                      MachinePointerInfo::getFixedStack(FrameIdx),
6299                      false, false, 0);
6300
6301       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6302              "Expected an i32 store");
6303
6304       RLI.Ptr = FIdx;
6305       RLI.Chain = Store;
6306       RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
6307       RLI.Alignment = 4;
6308
6309       MachineMemOperand *MMO =
6310         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6311                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6312       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6313       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
6314                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
6315                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
6316                                      Ops, MVT::i32, MMO);
6317     } else
6318       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
6319
6320     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
6321
6322     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6323       FP = DAG.getNode(ISD::FP_ROUND, dl,
6324                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
6325     return FP;
6326   }
6327
6328   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
6329          "Unhandled INT_TO_FP type in custom expander!");
6330   // Since we only generate this in 64-bit mode, we can take advantage of
6331   // 64-bit registers.  In particular, sign extend the input value into the
6332   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
6333   // then lfd it and fcfid it.
6334   MachineFunction &MF = DAG.getMachineFunction();
6335   MachineFrameInfo *FrameInfo = MF.getFrameInfo();
6336   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
6337
6338   SDValue Ld;
6339   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
6340     ReuseLoadInfo RLI;
6341     bool ReusingLoad;
6342     if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
6343                                             DAG))) {
6344       int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
6345       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6346
6347       SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
6348                                    MachinePointerInfo::getFixedStack(FrameIdx),
6349                                    false, false, 0);
6350
6351       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6352              "Expected an i32 store");
6353
6354       RLI.Ptr = FIdx;
6355       RLI.Chain = Store;
6356       RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
6357       RLI.Alignment = 4;
6358     }
6359
6360     MachineMemOperand *MMO =
6361       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6362                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6363     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6364     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
6365                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
6366                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
6367                                  Ops, MVT::i32, MMO);
6368     if (ReusingLoad)
6369       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
6370   } else {
6371     assert(Subtarget.isPPC64() &&
6372            "i32->FP without LFIWAX supported only on PPC64");
6373
6374     int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
6375     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6376
6377     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
6378                                 Op.getOperand(0));
6379
6380     // STD the extended value into the stack slot.
6381     SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
6382                                  MachinePointerInfo::getFixedStack(FrameIdx),
6383                                  false, false, 0);
6384
6385     // Load the value as a double.
6386     Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
6387                      MachinePointerInfo::getFixedStack(FrameIdx),
6388                      false, false, false, 0);
6389   }
6390
6391   // FCFID it and return it.
6392   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
6393   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6394     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
6395                      DAG.getIntPtrConstant(0, dl));
6396   return FP;
6397 }
6398
6399 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
6400                                             SelectionDAG &DAG) const {
6401   SDLoc dl(Op);
6402   /*
6403    The rounding mode is in bits 30:31 of FPSR, and has the following
6404    settings:
6405      00 Round to nearest
6406      01 Round to 0
6407      10 Round to +inf
6408      11 Round to -inf
6409
6410   FLT_ROUNDS, on the other hand, expects the following:
6411     -1 Undefined
6412      0 Round to 0
6413      1 Round to nearest
6414      2 Round to +inf
6415      3 Round to -inf
6416
6417   To perform the conversion, we do:
6418     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
6419   */
6420
6421   MachineFunction &MF = DAG.getMachineFunction();
6422   EVT VT = Op.getValueType();
6423   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
6424
6425   // Save FP Control Word to register
6426   EVT NodeTys[] = {
6427     MVT::f64,    // return register
6428     MVT::Glue    // unused in this context
6429   };
6430   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
6431
6432   // Save FP register to stack slot
6433   int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
6434   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
6435   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
6436                                StackSlot, MachinePointerInfo(), false, false,0);
6437
6438   // Load FP Control Word from low 32 bits of stack slot.
6439   SDValue Four = DAG.getConstant(4, dl, PtrVT);
6440   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
6441   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
6442                             false, false, false, 0);
6443
6444   // Transform as necessary
6445   SDValue CWD1 =
6446     DAG.getNode(ISD::AND, dl, MVT::i32,
6447                 CWD, DAG.getConstant(3, dl, MVT::i32));
6448   SDValue CWD2 =
6449     DAG.getNode(ISD::SRL, dl, MVT::i32,
6450                 DAG.getNode(ISD::AND, dl, MVT::i32,
6451                             DAG.getNode(ISD::XOR, dl, MVT::i32,
6452                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
6453                             DAG.getConstant(3, dl, MVT::i32)),
6454                 DAG.getConstant(1, dl, MVT::i32));
6455
6456   SDValue RetVal =
6457     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
6458
6459   return DAG.getNode((VT.getSizeInBits() < 16 ?
6460                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
6461 }
6462
6463 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6464   EVT VT = Op.getValueType();
6465   unsigned BitWidth = VT.getSizeInBits();
6466   SDLoc dl(Op);
6467   assert(Op.getNumOperands() == 3 &&
6468          VT == Op.getOperand(1).getValueType() &&
6469          "Unexpected SHL!");
6470
6471   // Expand into a bunch of logical ops.  Note that these ops
6472   // depend on the PPC behavior for oversized shift amounts.
6473   SDValue Lo = Op.getOperand(0);
6474   SDValue Hi = Op.getOperand(1);
6475   SDValue Amt = Op.getOperand(2);
6476   EVT AmtVT = Amt.getValueType();
6477
6478   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6479                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6480   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
6481   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
6482   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
6483   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6484                              DAG.getConstant(-BitWidth, dl, AmtVT));
6485   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
6486   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6487   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
6488   SDValue OutOps[] = { OutLo, OutHi };
6489   return DAG.getMergeValues(OutOps, dl);
6490 }
6491
6492 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6493   EVT VT = Op.getValueType();
6494   SDLoc dl(Op);
6495   unsigned BitWidth = VT.getSizeInBits();
6496   assert(Op.getNumOperands() == 3 &&
6497          VT == Op.getOperand(1).getValueType() &&
6498          "Unexpected SRL!");
6499
6500   // Expand into a bunch of logical ops.  Note that these ops
6501   // depend on the PPC behavior for oversized shift amounts.
6502   SDValue Lo = Op.getOperand(0);
6503   SDValue Hi = Op.getOperand(1);
6504   SDValue Amt = Op.getOperand(2);
6505   EVT AmtVT = Amt.getValueType();
6506
6507   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6508                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6509   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
6510   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
6511   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
6512   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6513                              DAG.getConstant(-BitWidth, dl, AmtVT));
6514   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
6515   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6516   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
6517   SDValue OutOps[] = { OutLo, OutHi };
6518   return DAG.getMergeValues(OutOps, dl);
6519 }
6520
6521 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
6522   SDLoc dl(Op);
6523   EVT VT = Op.getValueType();
6524   unsigned BitWidth = VT.getSizeInBits();
6525   assert(Op.getNumOperands() == 3 &&
6526          VT == Op.getOperand(1).getValueType() &&
6527          "Unexpected SRA!");
6528
6529   // Expand into a bunch of logical ops, followed by a select_cc.
6530   SDValue Lo = Op.getOperand(0);
6531   SDValue Hi = Op.getOperand(1);
6532   SDValue Amt = Op.getOperand(2);
6533   EVT AmtVT = Amt.getValueType();
6534
6535   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6536                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6537   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
6538   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
6539   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
6540   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6541                              DAG.getConstant(-BitWidth, dl, AmtVT));
6542   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
6543   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
6544   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
6545                                   Tmp4, Tmp6, ISD::SETLE);
6546   SDValue OutOps[] = { OutLo, OutHi };
6547   return DAG.getMergeValues(OutOps, dl);
6548 }
6549
6550 //===----------------------------------------------------------------------===//
6551 // Vector related lowering.
6552 //
6553
6554 /// BuildSplatI - Build a canonical splati of Val with an element size of
6555 /// SplatSize.  Cast the result to VT.
6556 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
6557                              SelectionDAG &DAG, SDLoc dl) {
6558   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
6559
6560   static const MVT VTys[] = { // canonical VT to use for each size.
6561     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
6562   };
6563
6564   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
6565
6566   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
6567   if (Val == -1)
6568     SplatSize = 1;
6569
6570   EVT CanonicalVT = VTys[SplatSize-1];
6571
6572   // Build a canonical splat for this value.
6573   SDValue Elt = DAG.getConstant(Val, dl, MVT::i32);
6574   SmallVector<SDValue, 8> Ops;
6575   Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
6576   SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
6577   return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
6578 }
6579
6580 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
6581 /// specified intrinsic ID.
6582 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
6583                                 SelectionDAG &DAG, SDLoc dl,
6584                                 EVT DestVT = MVT::Other) {
6585   if (DestVT == MVT::Other) DestVT = Op.getValueType();
6586   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
6587                      DAG.getConstant(IID, dl, MVT::i32), Op);
6588 }
6589
6590 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
6591 /// specified intrinsic ID.
6592 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
6593                                 SelectionDAG &DAG, SDLoc dl,
6594                                 EVT DestVT = MVT::Other) {
6595   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
6596   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
6597                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
6598 }
6599
6600 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
6601 /// specified intrinsic ID.
6602 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
6603                                 SDValue Op2, SelectionDAG &DAG,
6604                                 SDLoc dl, EVT DestVT = MVT::Other) {
6605   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
6606   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
6607                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
6608 }
6609
6610
6611 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
6612 /// amount.  The result has the specified value type.
6613 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
6614                              EVT VT, SelectionDAG &DAG, SDLoc dl) {
6615   // Force LHS/RHS to be the right type.
6616   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
6617   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
6618
6619   int Ops[16];
6620   for (unsigned i = 0; i != 16; ++i)
6621     Ops[i] = i + Amt;
6622   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
6623   return DAG.getNode(ISD::BITCAST, dl, VT, T);
6624 }
6625
6626 // If this is a case we can't handle, return null and let the default
6627 // expansion code take care of it.  If we CAN select this case, and if it
6628 // selects to a single instruction, return Op.  Otherwise, if we can codegen
6629 // this case more efficiently than a constant pool load, lower it to the
6630 // sequence of ops that should be used.
6631 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
6632                                              SelectionDAG &DAG) const {
6633   SDLoc dl(Op);
6634   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6635   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
6636
6637   if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
6638     // We first build an i32 vector, load it into a QPX register,
6639     // then convert it to a floating-point vector and compare it
6640     // to a zero vector to get the boolean result.
6641     MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
6642     int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
6643     MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
6644     EVT PtrVT = getPointerTy();
6645     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6646
6647     assert(BVN->getNumOperands() == 4 &&
6648       "BUILD_VECTOR for v4i1 does not have 4 operands");
6649
6650     bool IsConst = true;
6651     for (unsigned i = 0; i < 4; ++i) {
6652       if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
6653       if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
6654         IsConst = false;
6655         break;
6656       }
6657     }
6658
6659     if (IsConst) {
6660       Constant *One =
6661         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
6662       Constant *NegOne =
6663         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
6664
6665       SmallVector<Constant*, 4> CV(4, NegOne);
6666       for (unsigned i = 0; i < 4; ++i) {
6667         if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
6668           CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
6669         else if (cast<ConstantSDNode>(BVN->getOperand(i))->
6670                    getConstantIntValue()->isZero())
6671           continue;
6672         else
6673           CV[i] = One;
6674       }
6675
6676       Constant *CP = ConstantVector::get(CV);
6677       SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(),
6678                       16 /* alignment */);
6679
6680       SmallVector<SDValue, 2> Ops;
6681       Ops.push_back(DAG.getEntryNode());
6682       Ops.push_back(CPIdx);
6683
6684       SmallVector<EVT, 2> ValueVTs;
6685       ValueVTs.push_back(MVT::v4i1);
6686       ValueVTs.push_back(MVT::Other); // chain
6687       SDVTList VTs = DAG.getVTList(ValueVTs);
6688
6689       return DAG.getMemIntrinsicNode(PPCISD::QVLFSb,
6690         dl, VTs, Ops, MVT::v4f32,
6691         MachinePointerInfo::getConstantPool());
6692     }
6693
6694     SmallVector<SDValue, 4> Stores;
6695     for (unsigned i = 0; i < 4; ++i) {
6696       if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
6697
6698       unsigned Offset = 4*i;
6699       SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
6700       Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
6701
6702       unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
6703       if (StoreSize > 4) {
6704         Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
6705                                            BVN->getOperand(i), Idx,
6706                                            PtrInfo.getWithOffset(Offset),
6707                                            MVT::i32, false, false, 0));
6708       } else {
6709         SDValue StoreValue = BVN->getOperand(i);
6710         if (StoreSize < 4)
6711           StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
6712
6713         Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
6714                                       StoreValue, Idx,
6715                                       PtrInfo.getWithOffset(Offset),
6716                                       false, false, 0));
6717       }
6718     }
6719
6720     SDValue StoreChain;
6721     if (!Stores.empty())
6722       StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6723     else
6724       StoreChain = DAG.getEntryNode();
6725
6726     // Now load from v4i32 into the QPX register; this will extend it to
6727     // v4i64 but not yet convert it to a floating point. Nevertheless, this
6728     // is typed as v4f64 because the QPX register integer states are not
6729     // explicitly represented.
6730
6731     SmallVector<SDValue, 2> Ops;
6732     Ops.push_back(StoreChain);
6733     Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32));
6734     Ops.push_back(FIdx);
6735
6736     SmallVector<EVT, 2> ValueVTs;
6737     ValueVTs.push_back(MVT::v4f64);
6738     ValueVTs.push_back(MVT::Other); // chain
6739     SDVTList VTs = DAG.getVTList(ValueVTs);
6740
6741     SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
6742       dl, VTs, Ops, MVT::v4i32, PtrInfo);
6743     LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
6744       DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
6745       LoadedVect);
6746
6747     SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::f64);
6748     FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
6749                           FPZeros, FPZeros, FPZeros, FPZeros);
6750
6751     return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
6752   }
6753
6754   // All other QPX vectors are handled by generic code.
6755   if (Subtarget.hasQPX())
6756     return SDValue();
6757
6758   // Check if this is a splat of a constant value.
6759   APInt APSplatBits, APSplatUndef;
6760   unsigned SplatBitSize;
6761   bool HasAnyUndefs;
6762   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
6763                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
6764       SplatBitSize > 32)
6765     return SDValue();
6766
6767   unsigned SplatBits = APSplatBits.getZExtValue();
6768   unsigned SplatUndef = APSplatUndef.getZExtValue();
6769   unsigned SplatSize = SplatBitSize / 8;
6770
6771   // First, handle single instruction cases.
6772
6773   // All zeros?
6774   if (SplatBits == 0) {
6775     // Canonicalize all zero vectors to be v4i32.
6776     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
6777       SDValue Z = DAG.getConstant(0, dl, MVT::i32);
6778       Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
6779       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
6780     }
6781     return Op;
6782   }
6783
6784   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
6785   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
6786                     (32-SplatBitSize));
6787   if (SextVal >= -16 && SextVal <= 15)
6788     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
6789
6790
6791   // Two instruction sequences.
6792
6793   // If this value is in the range [-32,30] and is even, use:
6794   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
6795   // If this value is in the range [17,31] and is odd, use:
6796   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
6797   // If this value is in the range [-31,-17] and is odd, use:
6798   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
6799   // Note the last two are three-instruction sequences.
6800   if (SextVal >= -32 && SextVal <= 31) {
6801     // To avoid having these optimizations undone by constant folding,
6802     // we convert to a pseudo that will be expanded later into one of
6803     // the above forms.
6804     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
6805     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
6806               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
6807     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
6808     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
6809     if (VT == Op.getValueType())
6810       return RetVal;
6811     else
6812       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
6813   }
6814
6815   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
6816   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
6817   // for fneg/fabs.
6818   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
6819     // Make -1 and vspltisw -1:
6820     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
6821
6822     // Make the VSLW intrinsic, computing 0x8000_0000.
6823     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
6824                                    OnesV, DAG, dl);
6825
6826     // xor by OnesV to invert it.
6827     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
6828     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6829   }
6830
6831   // Check to see if this is a wide variety of vsplti*, binop self cases.
6832   static const signed char SplatCsts[] = {
6833     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
6834     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
6835   };
6836
6837   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
6838     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
6839     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
6840     int i = SplatCsts[idx];
6841
6842     // Figure out what shift amount will be used by altivec if shifted by i in
6843     // this splat size.
6844     unsigned TypeShiftAmt = i & (SplatBitSize-1);
6845
6846     // vsplti + shl self.
6847     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
6848       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6849       static const unsigned IIDs[] = { // Intrinsic to use for each size.
6850         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
6851         Intrinsic::ppc_altivec_vslw
6852       };
6853       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
6854       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6855     }
6856
6857     // vsplti + srl self.
6858     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
6859       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6860       static const unsigned IIDs[] = { // Intrinsic to use for each size.
6861         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
6862         Intrinsic::ppc_altivec_vsrw
6863       };
6864       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
6865       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6866     }
6867
6868     // vsplti + sra self.
6869     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
6870       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6871       static const unsigned IIDs[] = { // Intrinsic to use for each size.
6872         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
6873         Intrinsic::ppc_altivec_vsraw
6874       };
6875       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
6876       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6877     }
6878
6879     // vsplti + rol self.
6880     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
6881                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
6882       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6883       static const unsigned IIDs[] = { // Intrinsic to use for each size.
6884         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
6885         Intrinsic::ppc_altivec_vrlw
6886       };
6887       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
6888       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6889     }
6890
6891     // t = vsplti c, result = vsldoi t, t, 1
6892     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
6893       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
6894       return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
6895     }
6896     // t = vsplti c, result = vsldoi t, t, 2
6897     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
6898       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
6899       return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
6900     }
6901     // t = vsplti c, result = vsldoi t, t, 3
6902     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
6903       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
6904       return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
6905     }
6906   }
6907
6908   return SDValue();
6909 }
6910
6911 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
6912 /// the specified operations to build the shuffle.
6913 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
6914                                       SDValue RHS, SelectionDAG &DAG,
6915                                       SDLoc dl) {
6916   unsigned OpNum = (PFEntry >> 26) & 0x0F;
6917   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
6918   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
6919
6920   enum {
6921     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
6922     OP_VMRGHW,
6923     OP_VMRGLW,
6924     OP_VSPLTISW0,
6925     OP_VSPLTISW1,
6926     OP_VSPLTISW2,
6927     OP_VSPLTISW3,
6928     OP_VSLDOI4,
6929     OP_VSLDOI8,
6930     OP_VSLDOI12
6931   };
6932
6933   if (OpNum == OP_COPY) {
6934     if (LHSID == (1*9+2)*9+3) return LHS;
6935     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
6936     return RHS;
6937   }
6938
6939   SDValue OpLHS, OpRHS;
6940   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
6941   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
6942
6943   int ShufIdxs[16];
6944   switch (OpNum) {
6945   default: llvm_unreachable("Unknown i32 permute!");
6946   case OP_VMRGHW:
6947     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
6948     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
6949     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
6950     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
6951     break;
6952   case OP_VMRGLW:
6953     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
6954     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
6955     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
6956     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
6957     break;
6958   case OP_VSPLTISW0:
6959     for (unsigned i = 0; i != 16; ++i)
6960       ShufIdxs[i] = (i&3)+0;
6961     break;
6962   case OP_VSPLTISW1:
6963     for (unsigned i = 0; i != 16; ++i)
6964       ShufIdxs[i] = (i&3)+4;
6965     break;
6966   case OP_VSPLTISW2:
6967     for (unsigned i = 0; i != 16; ++i)
6968       ShufIdxs[i] = (i&3)+8;
6969     break;
6970   case OP_VSPLTISW3:
6971     for (unsigned i = 0; i != 16; ++i)
6972       ShufIdxs[i] = (i&3)+12;
6973     break;
6974   case OP_VSLDOI4:
6975     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
6976   case OP_VSLDOI8:
6977     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
6978   case OP_VSLDOI12:
6979     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
6980   }
6981   EVT VT = OpLHS.getValueType();
6982   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
6983   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
6984   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
6985   return DAG.getNode(ISD::BITCAST, dl, VT, T);
6986 }
6987
6988 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
6989 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
6990 /// return the code it can be lowered into.  Worst case, it can always be
6991 /// lowered into a vperm.
6992 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
6993                                                SelectionDAG &DAG) const {
6994   SDLoc dl(Op);
6995   SDValue V1 = Op.getOperand(0);
6996   SDValue V2 = Op.getOperand(1);
6997   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
6998   EVT VT = Op.getValueType();
6999   bool isLittleEndian = Subtarget.isLittleEndian();
7000
7001   if (Subtarget.hasQPX()) {
7002     if (VT.getVectorNumElements() != 4)
7003       return SDValue();
7004
7005     if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
7006
7007     int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
7008     if (AlignIdx != -1) {
7009       return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
7010                          DAG.getConstant(AlignIdx, dl, MVT::i32));
7011     } else if (SVOp->isSplat()) {
7012       int SplatIdx = SVOp->getSplatIndex();
7013       if (SplatIdx >= 4) {
7014         std::swap(V1, V2);
7015         SplatIdx -= 4;
7016       }
7017
7018       // FIXME: If SplatIdx == 0 and the input came from a load, then there is
7019       // nothing to do.
7020
7021       return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
7022                          DAG.getConstant(SplatIdx, dl, MVT::i32));
7023     }
7024
7025     // Lower this into a qvgpci/qvfperm pair.
7026
7027     // Compute the qvgpci literal
7028     unsigned idx = 0;
7029     for (unsigned i = 0; i < 4; ++i) {
7030       int m = SVOp->getMaskElt(i);
7031       unsigned mm = m >= 0 ? (unsigned) m : i;
7032       idx |= mm << (3-i)*3;
7033     }
7034
7035     SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
7036                              DAG.getConstant(idx, dl, MVT::i32));
7037     return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
7038   }
7039
7040   // Cases that are handled by instructions that take permute immediates
7041   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
7042   // selected by the instruction selector.
7043   if (V2.getOpcode() == ISD::UNDEF) {
7044     if (PPC::isSplatShuffleMask(SVOp, 1) ||
7045         PPC::isSplatShuffleMask(SVOp, 2) ||
7046         PPC::isSplatShuffleMask(SVOp, 4) ||
7047         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
7048         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
7049         PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
7050         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
7051         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
7052         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
7053         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
7054         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
7055         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
7056         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
7057       return Op;
7058     }
7059   }
7060
7061   // Altivec has a variety of "shuffle immediates" that take two vector inputs
7062   // and produce a fixed permutation.  If any of these match, do not lower to
7063   // VPERM.
7064   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
7065   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7066       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7067       PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7068       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
7069       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7070       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7071       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
7072       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7073       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7074       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
7075     return Op;
7076
7077   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
7078   // perfect shuffle table to emit an optimal matching sequence.
7079   ArrayRef<int> PermMask = SVOp->getMask();
7080
7081   unsigned PFIndexes[4];
7082   bool isFourElementShuffle = true;
7083   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
7084     unsigned EltNo = 8;   // Start out undef.
7085     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
7086       if (PermMask[i*4+j] < 0)
7087         continue;   // Undef, ignore it.
7088
7089       unsigned ByteSource = PermMask[i*4+j];
7090       if ((ByteSource & 3) != j) {
7091         isFourElementShuffle = false;
7092         break;
7093       }
7094
7095       if (EltNo == 8) {
7096         EltNo = ByteSource/4;
7097       } else if (EltNo != ByteSource/4) {
7098         isFourElementShuffle = false;
7099         break;
7100       }
7101     }
7102     PFIndexes[i] = EltNo;
7103   }
7104
7105   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
7106   // perfect shuffle vector to determine if it is cost effective to do this as
7107   // discrete instructions, or whether we should use a vperm.
7108   // For now, we skip this for little endian until such time as we have a
7109   // little-endian perfect shuffle table.
7110   if (isFourElementShuffle && !isLittleEndian) {
7111     // Compute the index in the perfect shuffle table.
7112     unsigned PFTableIndex =
7113       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
7114
7115     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7116     unsigned Cost  = (PFEntry >> 30);
7117
7118     // Determining when to avoid vperm is tricky.  Many things affect the cost
7119     // of vperm, particularly how many times the perm mask needs to be computed.
7120     // For example, if the perm mask can be hoisted out of a loop or is already
7121     // used (perhaps because there are multiple permutes with the same shuffle
7122     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
7123     // the loop requires an extra register.
7124     //
7125     // As a compromise, we only emit discrete instructions if the shuffle can be
7126     // generated in 3 or fewer operations.  When we have loop information
7127     // available, if this block is within a loop, we should avoid using vperm
7128     // for 3-operation perms and use a constant pool load instead.
7129     if (Cost < 3)
7130       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
7131   }
7132
7133   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
7134   // vector that will get spilled to the constant pool.
7135   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
7136
7137   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
7138   // that it is in input element units, not in bytes.  Convert now.
7139
7140   // For little endian, the order of the input vectors is reversed, and
7141   // the permutation mask is complemented with respect to 31.  This is
7142   // necessary to produce proper semantics with the big-endian-biased vperm
7143   // instruction.
7144   EVT EltVT = V1.getValueType().getVectorElementType();
7145   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
7146
7147   SmallVector<SDValue, 16> ResultMask;
7148   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
7149     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
7150
7151     for (unsigned j = 0; j != BytesPerElement; ++j)
7152       if (isLittleEndian)
7153         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
7154                                              dl, MVT::i32));
7155       else
7156         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
7157                                              MVT::i32));
7158   }
7159
7160   SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
7161                                   ResultMask);
7162   if (isLittleEndian)
7163     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7164                        V2, V1, VPermMask);
7165   else
7166     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7167                        V1, V2, VPermMask);
7168 }
7169
7170 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
7171 /// altivec comparison.  If it is, return true and fill in Opc/isDot with
7172 /// information about the intrinsic.
7173 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
7174                                   bool &isDot, const PPCSubtarget &Subtarget) {
7175   unsigned IntrinsicID =
7176     cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
7177   CompareOpc = -1;
7178   isDot = false;
7179   switch (IntrinsicID) {
7180   default: return false;
7181     // Comparison predicates.
7182   case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
7183   case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
7184   case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
7185   case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
7186   case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
7187   case Intrinsic::ppc_altivec_vcmpequd_p:
7188     if (Subtarget.hasP8Altivec()) {
7189       CompareOpc = 199;
7190       isDot = 1;
7191     }
7192     else
7193       return false;
7194
7195     break;
7196   case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
7197   case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
7198   case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
7199   case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
7200   case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
7201   case Intrinsic::ppc_altivec_vcmpgtsd_p:
7202     if (Subtarget.hasP8Altivec()) {
7203       CompareOpc = 967;
7204       isDot = 1;
7205     }
7206     else
7207       return false;
7208
7209     break;
7210   case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
7211   case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
7212   case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
7213   case Intrinsic::ppc_altivec_vcmpgtud_p:
7214     if (Subtarget.hasP8Altivec()) {
7215       CompareOpc = 711;
7216       isDot = 1;
7217     }
7218     else
7219       return false;
7220
7221     break;
7222
7223     // Normal Comparisons.
7224   case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
7225   case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
7226   case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
7227   case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
7228   case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
7229   case Intrinsic::ppc_altivec_vcmpequd:
7230     if (Subtarget.hasP8Altivec()) {
7231       CompareOpc = 199;
7232       isDot = 0;
7233     }
7234     else
7235       return false;
7236
7237     break;
7238   case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
7239   case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
7240   case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
7241   case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
7242   case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
7243   case Intrinsic::ppc_altivec_vcmpgtsd:
7244     if (Subtarget.hasP8Altivec()) {
7245       CompareOpc = 967;
7246       isDot = 0;
7247     }
7248     else
7249       return false;
7250
7251     break;
7252   case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
7253   case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
7254   case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
7255   case Intrinsic::ppc_altivec_vcmpgtud:
7256     if (Subtarget.hasP8Altivec()) {
7257       CompareOpc = 711;
7258       isDot = 0;
7259     }
7260     else
7261       return false;
7262
7263     break;
7264   }
7265   return true;
7266 }
7267
7268 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
7269 /// lower, do it, otherwise return null.
7270 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
7271                                                    SelectionDAG &DAG) const {
7272   // If this is a lowered altivec predicate compare, CompareOpc is set to the
7273   // opcode number of the comparison.
7274   SDLoc dl(Op);
7275   int CompareOpc;
7276   bool isDot;
7277   if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
7278     return SDValue();    // Don't custom lower most intrinsics.
7279
7280   // If this is a non-dot comparison, make the VCMP node and we are done.
7281   if (!isDot) {
7282     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
7283                               Op.getOperand(1), Op.getOperand(2),
7284                               DAG.getConstant(CompareOpc, dl, MVT::i32));
7285     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
7286   }
7287
7288   // Create the PPCISD altivec 'dot' comparison node.
7289   SDValue Ops[] = {
7290     Op.getOperand(2),  // LHS
7291     Op.getOperand(3),  // RHS
7292     DAG.getConstant(CompareOpc, dl, MVT::i32)
7293   };
7294   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
7295   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
7296
7297   // Now that we have the comparison, emit a copy from the CR to a GPR.
7298   // This is flagged to the above dot comparison.
7299   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
7300                                 DAG.getRegister(PPC::CR6, MVT::i32),
7301                                 CompNode.getValue(1));
7302
7303   // Unpack the result based on how the target uses it.
7304   unsigned BitNo;   // Bit # of CR6.
7305   bool InvertBit;   // Invert result?
7306   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
7307   default:  // Can't happen, don't crash on invalid number though.
7308   case 0:   // Return the value of the EQ bit of CR6.
7309     BitNo = 0; InvertBit = false;
7310     break;
7311   case 1:   // Return the inverted value of the EQ bit of CR6.
7312     BitNo = 0; InvertBit = true;
7313     break;
7314   case 2:   // Return the value of the LT bit of CR6.
7315     BitNo = 2; InvertBit = false;
7316     break;
7317   case 3:   // Return the inverted value of the LT bit of CR6.
7318     BitNo = 2; InvertBit = true;
7319     break;
7320   }
7321
7322   // Shift the bit into the low position.
7323   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
7324                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
7325   // Isolate the bit.
7326   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
7327                       DAG.getConstant(1, dl, MVT::i32));
7328
7329   // If we are supposed to, toggle the bit.
7330   if (InvertBit)
7331     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
7332                         DAG.getConstant(1, dl, MVT::i32));
7333   return Flags;
7334 }
7335
7336 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
7337                                                   SelectionDAG &DAG) const {
7338   SDLoc dl(Op);
7339   // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
7340   // instructions), but for smaller types, we need to first extend up to v2i32
7341   // before doing going farther.
7342   if (Op.getValueType() == MVT::v2i64) {
7343     EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
7344     if (ExtVT != MVT::v2i32) {
7345       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
7346       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
7347                        DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
7348                                         ExtVT.getVectorElementType(), 4)));
7349       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
7350       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
7351                        DAG.getValueType(MVT::v2i32));
7352     }
7353
7354     return Op;
7355   }
7356
7357   return SDValue();
7358 }
7359
7360 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
7361                                                    SelectionDAG &DAG) const {
7362   SDLoc dl(Op);
7363   // Create a stack slot that is 16-byte aligned.
7364   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7365   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7366   EVT PtrVT = getPointerTy();
7367   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7368
7369   // Store the input value into Value#0 of the stack slot.
7370   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
7371                                Op.getOperand(0), FIdx, MachinePointerInfo(),
7372                                false, false, 0);
7373   // Load it out.
7374   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
7375                      false, false, false, 0);
7376 }
7377
7378 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
7379                                                    SelectionDAG &DAG) const {
7380   SDLoc dl(Op);
7381   SDNode *N = Op.getNode();
7382
7383   assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
7384          "Unknown extract_vector_elt type");
7385
7386   SDValue Value = N->getOperand(0);
7387
7388   // The first part of this is like the store lowering except that we don't
7389   // need to track the chain.
7390
7391   // The values are now known to be -1 (false) or 1 (true). To convert this
7392   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7393   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7394   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7395
7396   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
7397   // understand how to form the extending load.
7398   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
7399   FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
7400                         FPHalfs, FPHalfs, FPHalfs, FPHalfs);
7401
7402   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7403
7404   // Now convert to an integer and store.
7405   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7406     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
7407     Value);
7408
7409   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7410   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7411   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
7412   EVT PtrVT = getPointerTy();
7413   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7414
7415   SDValue StoreChain = DAG.getEntryNode();
7416   SmallVector<SDValue, 2> Ops;
7417   Ops.push_back(StoreChain);
7418   Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32));
7419   Ops.push_back(Value);
7420   Ops.push_back(FIdx);
7421
7422   SmallVector<EVT, 2> ValueVTs;
7423   ValueVTs.push_back(MVT::Other); // chain
7424   SDVTList VTs = DAG.getVTList(ValueVTs);
7425
7426   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
7427     dl, VTs, Ops, MVT::v4i32, PtrInfo);
7428
7429   // Extract the value requested.
7430   unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
7431   SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7432   Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7433
7434   SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
7435                                PtrInfo.getWithOffset(Offset),
7436                                false, false, false, 0);
7437
7438   if (!Subtarget.useCRBits())
7439     return IntVal;
7440
7441   return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
7442 }
7443
7444 /// Lowering for QPX v4i1 loads
7445 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
7446                                            SelectionDAG &DAG) const {
7447   SDLoc dl(Op);
7448   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
7449   SDValue LoadChain = LN->getChain();
7450   SDValue BasePtr = LN->getBasePtr();
7451
7452   if (Op.getValueType() == MVT::v4f64 ||
7453       Op.getValueType() == MVT::v4f32) {
7454     EVT MemVT = LN->getMemoryVT();
7455     unsigned Alignment = LN->getAlignment();
7456
7457     // If this load is properly aligned, then it is legal.
7458     if (Alignment >= MemVT.getStoreSize())
7459       return Op;
7460
7461     EVT ScalarVT = Op.getValueType().getScalarType(),
7462         ScalarMemVT = MemVT.getScalarType();
7463     unsigned Stride = ScalarMemVT.getStoreSize();
7464
7465     SmallVector<SDValue, 8> Vals, LoadChains;
7466     for (unsigned Idx = 0; Idx < 4; ++Idx) {
7467       SDValue Load;
7468       if (ScalarVT != ScalarMemVT)
7469         Load =
7470           DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
7471                          BasePtr,
7472                          LN->getPointerInfo().getWithOffset(Idx*Stride),
7473                          ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(),
7474                          LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
7475                          LN->getAAInfo());
7476       else
7477         Load =
7478           DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
7479                        LN->getPointerInfo().getWithOffset(Idx*Stride),
7480                        LN->isVolatile(), LN->isNonTemporal(),
7481                        LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
7482                        LN->getAAInfo());
7483
7484       if (Idx == 0 && LN->isIndexed()) {
7485         assert(LN->getAddressingMode() == ISD::PRE_INC &&
7486                "Unknown addressing mode on vector load");
7487         Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
7488                                   LN->getAddressingMode());
7489       }
7490
7491       Vals.push_back(Load);
7492       LoadChains.push_back(Load.getValue(1));
7493
7494       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
7495                             DAG.getConstant(Stride, dl,
7496                                             BasePtr.getValueType()));
7497     }
7498
7499     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
7500     SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
7501                                 Op.getValueType(), Vals);
7502
7503     if (LN->isIndexed()) {
7504       SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
7505       return DAG.getMergeValues(RetOps, dl);
7506     }
7507
7508     SDValue RetOps[] = { Value, TF };
7509     return DAG.getMergeValues(RetOps, dl);
7510   }
7511
7512   assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
7513   assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
7514
7515   // To lower v4i1 from a byte array, we load the byte elements of the
7516   // vector and then reuse the BUILD_VECTOR logic.
7517
7518   SmallVector<SDValue, 4> VectElmts, VectElmtChains;
7519   for (unsigned i = 0; i < 4; ++i) {
7520     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
7521     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
7522
7523     VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD,
7524                         dl, MVT::i32, LoadChain, Idx,
7525                         LN->getPointerInfo().getWithOffset(i),
7526                         MVT::i8 /* memory type */,
7527                         LN->isVolatile(), LN->isNonTemporal(),
7528                         LN->isInvariant(),
7529                         1 /* alignment */, LN->getAAInfo()));
7530     VectElmtChains.push_back(VectElmts[i].getValue(1));
7531   }
7532
7533   LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
7534   SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts);
7535
7536   SDValue RVals[] = { Value, LoadChain };
7537   return DAG.getMergeValues(RVals, dl);
7538 }
7539
7540 /// Lowering for QPX v4i1 stores
7541 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
7542                                             SelectionDAG &DAG) const {
7543   SDLoc dl(Op);
7544   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
7545   SDValue StoreChain = SN->getChain();
7546   SDValue BasePtr = SN->getBasePtr();
7547   SDValue Value = SN->getValue();
7548
7549   if (Value.getValueType() == MVT::v4f64 ||
7550       Value.getValueType() == MVT::v4f32) {
7551     EVT MemVT = SN->getMemoryVT();
7552     unsigned Alignment = SN->getAlignment();
7553
7554     // If this store is properly aligned, then it is legal.
7555     if (Alignment >= MemVT.getStoreSize())
7556       return Op;
7557
7558     EVT ScalarVT = Value.getValueType().getScalarType(),
7559         ScalarMemVT = MemVT.getScalarType();
7560     unsigned Stride = ScalarMemVT.getStoreSize();
7561
7562     SmallVector<SDValue, 8> Stores;
7563     for (unsigned Idx = 0; Idx < 4; ++Idx) {
7564       SDValue Ex =
7565         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
7566                     DAG.getConstant(Idx, dl, getVectorIdxTy()));
7567       SDValue Store;
7568       if (ScalarVT != ScalarMemVT)
7569         Store =
7570           DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
7571                             SN->getPointerInfo().getWithOffset(Idx*Stride),
7572                             ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(),
7573                             MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
7574       else
7575         Store =
7576           DAG.getStore(StoreChain, dl, Ex, BasePtr,
7577                        SN->getPointerInfo().getWithOffset(Idx*Stride),
7578                        SN->isVolatile(), SN->isNonTemporal(),
7579                        MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
7580
7581       if (Idx == 0 && SN->isIndexed()) {
7582         assert(SN->getAddressingMode() == ISD::PRE_INC &&
7583                "Unknown addressing mode on vector store");
7584         Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
7585                                     SN->getAddressingMode());
7586       }
7587
7588       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
7589                             DAG.getConstant(Stride, dl,
7590                                             BasePtr.getValueType()));
7591       Stores.push_back(Store);
7592     }
7593
7594     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7595
7596     if (SN->isIndexed()) {
7597       SDValue RetOps[] = { TF, Stores[0].getValue(1) };
7598       return DAG.getMergeValues(RetOps, dl);
7599     }
7600
7601     return TF;
7602   }
7603
7604   assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
7605   assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
7606
7607   // The values are now known to be -1 (false) or 1 (true). To convert this
7608   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7609   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7610   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7611
7612   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
7613   // understand how to form the extending load.
7614   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
7615   FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
7616                         FPHalfs, FPHalfs, FPHalfs, FPHalfs);
7617
7618   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7619
7620   // Now convert to an integer and store.
7621   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7622     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
7623     Value);
7624
7625   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7626   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7627   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
7628   EVT PtrVT = getPointerTy();
7629   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7630
7631   SmallVector<SDValue, 2> Ops;
7632   Ops.push_back(StoreChain);
7633   Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32));
7634   Ops.push_back(Value);
7635   Ops.push_back(FIdx);
7636
7637   SmallVector<EVT, 2> ValueVTs;
7638   ValueVTs.push_back(MVT::Other); // chain
7639   SDVTList VTs = DAG.getVTList(ValueVTs);
7640
7641   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
7642     dl, VTs, Ops, MVT::v4i32, PtrInfo);
7643
7644   // Move data into the byte array.
7645   SmallVector<SDValue, 4> Loads, LoadChains;
7646   for (unsigned i = 0; i < 4; ++i) {
7647     unsigned Offset = 4*i;
7648     SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7649     Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7650
7651     Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
7652                                    PtrInfo.getWithOffset(Offset),
7653                                    false, false, false, 0));
7654     LoadChains.push_back(Loads[i].getValue(1));
7655   }
7656
7657   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
7658
7659   SmallVector<SDValue, 4> Stores;
7660   for (unsigned i = 0; i < 4; ++i) {
7661     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
7662     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
7663
7664     Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx,
7665                                        SN->getPointerInfo().getWithOffset(i),
7666                                        MVT::i8 /* memory type */,
7667                                        SN->isNonTemporal(), SN->isVolatile(),
7668                                        1 /* alignment */, SN->getAAInfo()));
7669   }
7670
7671   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7672
7673   return StoreChain;
7674 }
7675
7676 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
7677   SDLoc dl(Op);
7678   if (Op.getValueType() == MVT::v4i32) {
7679     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7680
7681     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
7682     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
7683
7684     SDValue RHSSwap =   // = vrlw RHS, 16
7685       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
7686
7687     // Shrinkify inputs to v8i16.
7688     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
7689     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
7690     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
7691
7692     // Low parts multiplied together, generating 32-bit results (we ignore the
7693     // top parts).
7694     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
7695                                         LHS, RHS, DAG, dl, MVT::v4i32);
7696
7697     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
7698                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
7699     // Shift the high parts up 16 bits.
7700     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
7701                               Neg16, DAG, dl);
7702     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
7703   } else if (Op.getValueType() == MVT::v8i16) {
7704     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7705
7706     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
7707
7708     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
7709                             LHS, RHS, Zero, DAG, dl);
7710   } else if (Op.getValueType() == MVT::v16i8) {
7711     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7712     bool isLittleEndian = Subtarget.isLittleEndian();
7713
7714     // Multiply the even 8-bit parts, producing 16-bit sums.
7715     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
7716                                            LHS, RHS, DAG, dl, MVT::v8i16);
7717     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
7718
7719     // Multiply the odd 8-bit parts, producing 16-bit sums.
7720     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
7721                                           LHS, RHS, DAG, dl, MVT::v8i16);
7722     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
7723
7724     // Merge the results together.  Because vmuleub and vmuloub are
7725     // instructions with a big-endian bias, we must reverse the
7726     // element numbering and reverse the meaning of "odd" and "even"
7727     // when generating little endian code.
7728     int Ops[16];
7729     for (unsigned i = 0; i != 8; ++i) {
7730       if (isLittleEndian) {
7731         Ops[i*2  ] = 2*i;
7732         Ops[i*2+1] = 2*i+16;
7733       } else {
7734         Ops[i*2  ] = 2*i+1;
7735         Ops[i*2+1] = 2*i+1+16;
7736       }
7737     }
7738     if (isLittleEndian)
7739       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
7740     else
7741       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
7742   } else {
7743     llvm_unreachable("Unknown mul to lower!");
7744   }
7745 }
7746
7747 /// LowerOperation - Provide custom lowering hooks for some operations.
7748 ///
7749 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
7750   switch (Op.getOpcode()) {
7751   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
7752   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
7753   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
7754   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
7755   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
7756   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
7757   case ISD::SETCC:              return LowerSETCC(Op, DAG);
7758   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
7759   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
7760   case ISD::VASTART:
7761     return LowerVASTART(Op, DAG, Subtarget);
7762
7763   case ISD::VAARG:
7764     return LowerVAARG(Op, DAG, Subtarget);
7765
7766   case ISD::VACOPY:
7767     return LowerVACOPY(Op, DAG, Subtarget);
7768
7769   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, Subtarget);
7770   case ISD::DYNAMIC_STACKALLOC:
7771     return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
7772
7773   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
7774   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
7775
7776   case ISD::LOAD:               return LowerLOAD(Op, DAG);
7777   case ISD::STORE:              return LowerSTORE(Op, DAG);
7778   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
7779   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
7780   case ISD::FP_TO_UINT:
7781   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
7782                                                       SDLoc(Op));
7783   case ISD::UINT_TO_FP:
7784   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
7785   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
7786
7787   // Lower 64-bit shifts.
7788   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
7789   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
7790   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
7791
7792   // Vector-related lowering.
7793   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
7794   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
7795   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7796   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
7797   case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
7798   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
7799   case ISD::MUL:                return LowerMUL(Op, DAG);
7800
7801   // For counter-based loop handling.
7802   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
7803
7804   // Frame & Return address.
7805   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
7806   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
7807   }
7808 }
7809
7810 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
7811                                            SmallVectorImpl<SDValue>&Results,
7812                                            SelectionDAG &DAG) const {
7813   SDLoc dl(N);
7814   switch (N->getOpcode()) {
7815   default:
7816     llvm_unreachable("Do not know how to custom type legalize this operation!");
7817   case ISD::READCYCLECOUNTER: {
7818     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
7819     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
7820
7821     Results.push_back(RTB);
7822     Results.push_back(RTB.getValue(1));
7823     Results.push_back(RTB.getValue(2));
7824     break;
7825   }
7826   case ISD::INTRINSIC_W_CHAIN: {
7827     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
7828         Intrinsic::ppc_is_decremented_ctr_nonzero)
7829       break;
7830
7831     assert(N->getValueType(0) == MVT::i1 &&
7832            "Unexpected result type for CTR decrement intrinsic");
7833     EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
7834     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
7835     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
7836                                  N->getOperand(1));
7837
7838     Results.push_back(NewInt);
7839     Results.push_back(NewInt.getValue(1));
7840     break;
7841   }
7842   case ISD::VAARG: {
7843     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
7844       return;
7845
7846     EVT VT = N->getValueType(0);
7847
7848     if (VT == MVT::i64) {
7849       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget);
7850
7851       Results.push_back(NewNode);
7852       Results.push_back(NewNode.getValue(1));
7853     }
7854     return;
7855   }
7856   case ISD::FP_ROUND_INREG: {
7857     assert(N->getValueType(0) == MVT::ppcf128);
7858     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
7859     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7860                              MVT::f64, N->getOperand(0),
7861                              DAG.getIntPtrConstant(0, dl));
7862     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7863                              MVT::f64, N->getOperand(0),
7864                              DAG.getIntPtrConstant(1, dl));
7865
7866     // Add the two halves of the long double in round-to-zero mode.
7867     SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7868
7869     // We know the low half is about to be thrown away, so just use something
7870     // convenient.
7871     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
7872                                 FPreg, FPreg));
7873     return;
7874   }
7875   case ISD::FP_TO_SINT:
7876   case ISD::FP_TO_UINT:
7877     // LowerFP_TO_INT() can only handle f32 and f64.
7878     if (N->getOperand(0).getValueType() == MVT::ppcf128)
7879       return;
7880     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
7881     return;
7882   }
7883 }
7884
7885
7886 //===----------------------------------------------------------------------===//
7887 //  Other Lowering Code
7888 //===----------------------------------------------------------------------===//
7889
7890 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
7891   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
7892   Function *Func = Intrinsic::getDeclaration(M, Id);
7893   return Builder.CreateCall(Func, {});
7894 }
7895
7896 // The mappings for emitLeading/TrailingFence is taken from
7897 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
7898 Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
7899                                          AtomicOrdering Ord, bool IsStore,
7900                                          bool IsLoad) const {
7901   if (Ord == SequentiallyConsistent)
7902     return callIntrinsic(Builder, Intrinsic::ppc_sync);
7903   if (isAtLeastRelease(Ord))
7904     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
7905   return nullptr;
7906 }
7907
7908 Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
7909                                           AtomicOrdering Ord, bool IsStore,
7910                                           bool IsLoad) const {
7911   if (IsLoad && isAtLeastAcquire(Ord))
7912     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
7913   // FIXME: this is too conservative, a dependent branch + isync is enough.
7914   // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
7915   // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
7916   // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
7917   return nullptr;
7918 }
7919
7920 MachineBasicBlock *
7921 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
7922                                     unsigned AtomicSize,
7923                                     unsigned BinOpcode) const {
7924   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
7925   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7926
7927   auto LoadMnemonic = PPC::LDARX;
7928   auto StoreMnemonic = PPC::STDCX;
7929   switch (AtomicSize) {
7930   default:
7931     llvm_unreachable("Unexpected size of atomic entity");
7932   case 1:
7933     LoadMnemonic = PPC::LBARX;
7934     StoreMnemonic = PPC::STBCX;
7935     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
7936     break;
7937   case 2:
7938     LoadMnemonic = PPC::LHARX;
7939     StoreMnemonic = PPC::STHCX;
7940     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
7941     break;
7942   case 4:
7943     LoadMnemonic = PPC::LWARX;
7944     StoreMnemonic = PPC::STWCX;
7945     break;
7946   case 8:
7947     LoadMnemonic = PPC::LDARX;
7948     StoreMnemonic = PPC::STDCX;
7949     break;
7950   }
7951
7952   const BasicBlock *LLVM_BB = BB->getBasicBlock();
7953   MachineFunction *F = BB->getParent();
7954   MachineFunction::iterator It = BB;
7955   ++It;
7956
7957   unsigned dest = MI->getOperand(0).getReg();
7958   unsigned ptrA = MI->getOperand(1).getReg();
7959   unsigned ptrB = MI->getOperand(2).getReg();
7960   unsigned incr = MI->getOperand(3).getReg();
7961   DebugLoc dl = MI->getDebugLoc();
7962
7963   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
7964   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
7965   F->insert(It, loopMBB);
7966   F->insert(It, exitMBB);
7967   exitMBB->splice(exitMBB->begin(), BB,
7968                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
7969   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
7970
7971   MachineRegisterInfo &RegInfo = F->getRegInfo();
7972   unsigned TmpReg = (!BinOpcode) ? incr :
7973     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
7974                                            : &PPC::GPRCRegClass);
7975
7976   //  thisMBB:
7977   //   ...
7978   //   fallthrough --> loopMBB
7979   BB->addSuccessor(loopMBB);
7980
7981   //  loopMBB:
7982   //   l[wd]arx dest, ptr
7983   //   add r0, dest, incr
7984   //   st[wd]cx. r0, ptr
7985   //   bne- loopMBB
7986   //   fallthrough --> exitMBB
7987   BB = loopMBB;
7988   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
7989     .addReg(ptrA).addReg(ptrB);
7990   if (BinOpcode)
7991     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
7992   BuildMI(BB, dl, TII->get(StoreMnemonic))
7993     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
7994   BuildMI(BB, dl, TII->get(PPC::BCC))
7995     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
7996   BB->addSuccessor(loopMBB);
7997   BB->addSuccessor(exitMBB);
7998
7999   //  exitMBB:
8000   //   ...
8001   BB = exitMBB;
8002   return BB;
8003 }
8004
8005 MachineBasicBlock *
8006 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
8007                                             MachineBasicBlock *BB,
8008                                             bool is8bit,    // operation
8009                                             unsigned BinOpcode) const {
8010   // If we support part-word atomic mnemonics, just use them
8011   if (Subtarget.hasPartwordAtomics())
8012     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
8013
8014   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
8015   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8016   // In 64 bit mode we have to use 64 bits for addresses, even though the
8017   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
8018   // registers without caring whether they're 32 or 64, but here we're
8019   // doing actual arithmetic on the addresses.
8020   bool is64bit = Subtarget.isPPC64();
8021   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
8022
8023   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8024   MachineFunction *F = BB->getParent();
8025   MachineFunction::iterator It = BB;
8026   ++It;
8027
8028   unsigned dest = MI->getOperand(0).getReg();
8029   unsigned ptrA = MI->getOperand(1).getReg();
8030   unsigned ptrB = MI->getOperand(2).getReg();
8031   unsigned incr = MI->getOperand(3).getReg();
8032   DebugLoc dl = MI->getDebugLoc();
8033
8034   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
8035   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8036   F->insert(It, loopMBB);
8037   F->insert(It, exitMBB);
8038   exitMBB->splice(exitMBB->begin(), BB,
8039                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
8040   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8041
8042   MachineRegisterInfo &RegInfo = F->getRegInfo();
8043   const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
8044                                           : &PPC::GPRCRegClass;
8045   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
8046   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
8047   unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
8048   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
8049   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
8050   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
8051   unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
8052   unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
8053   unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
8054   unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
8055   unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
8056   unsigned Ptr1Reg;
8057   unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
8058
8059   //  thisMBB:
8060   //   ...
8061   //   fallthrough --> loopMBB
8062   BB->addSuccessor(loopMBB);
8063
8064   // The 4-byte load must be aligned, while a char or short may be
8065   // anywhere in the word.  Hence all this nasty bookkeeping code.
8066   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
8067   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
8068   //   xori shift, shift1, 24 [16]
8069   //   rlwinm ptr, ptr1, 0, 0, 29
8070   //   slw incr2, incr, shift
8071   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
8072   //   slw mask, mask2, shift
8073   //  loopMBB:
8074   //   lwarx tmpDest, ptr
8075   //   add tmp, tmpDest, incr2
8076   //   andc tmp2, tmpDest, mask
8077   //   and tmp3, tmp, mask
8078   //   or tmp4, tmp3, tmp2
8079   //   stwcx. tmp4, ptr
8080   //   bne- loopMBB
8081   //   fallthrough --> exitMBB
8082   //   srw dest, tmpDest, shift
8083   if (ptrA != ZeroReg) {
8084     Ptr1Reg = RegInfo.createVirtualRegister(RC);
8085     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
8086       .addReg(ptrA).addReg(ptrB);
8087   } else {
8088     Ptr1Reg = ptrB;
8089   }
8090   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
8091       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
8092   BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
8093       .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
8094   if (is64bit)
8095     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
8096       .addReg(Ptr1Reg).addImm(0).addImm(61);
8097   else
8098     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
8099       .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
8100   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
8101       .addReg(incr).addReg(ShiftReg);
8102   if (is8bit)
8103     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
8104   else {
8105     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
8106     BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
8107   }
8108   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
8109       .addReg(Mask2Reg).addReg(ShiftReg);
8110
8111   BB = loopMBB;
8112   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
8113     .addReg(ZeroReg).addReg(PtrReg);
8114   if (BinOpcode)
8115     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
8116       .addReg(Incr2Reg).addReg(TmpDestReg);
8117   BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
8118     .addReg(TmpDestReg).addReg(MaskReg);
8119   BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
8120     .addReg(TmpReg).addReg(MaskReg);
8121   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
8122     .addReg(Tmp3Reg).addReg(Tmp2Reg);
8123   BuildMI(BB, dl, TII->get(PPC::STWCX))
8124     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
8125   BuildMI(BB, dl, TII->get(PPC::BCC))
8126     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
8127   BB->addSuccessor(loopMBB);
8128   BB->addSuccessor(exitMBB);
8129
8130   //  exitMBB:
8131   //   ...
8132   BB = exitMBB;
8133   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
8134     .addReg(ShiftReg);
8135   return BB;
8136 }
8137
8138 llvm::MachineBasicBlock*
8139 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
8140                                     MachineBasicBlock *MBB) const {
8141   DebugLoc DL = MI->getDebugLoc();
8142   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8143
8144   MachineFunction *MF = MBB->getParent();
8145   MachineRegisterInfo &MRI = MF->getRegInfo();
8146
8147   const BasicBlock *BB = MBB->getBasicBlock();
8148   MachineFunction::iterator I = MBB;
8149   ++I;
8150
8151   // Memory Reference
8152   MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
8153   MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
8154
8155   unsigned DstReg = MI->getOperand(0).getReg();
8156   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
8157   assert(RC->hasType(MVT::i32) && "Invalid destination!");
8158   unsigned mainDstReg = MRI.createVirtualRegister(RC);
8159   unsigned restoreDstReg = MRI.createVirtualRegister(RC);
8160
8161   MVT PVT = getPointerTy();
8162   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8163          "Invalid Pointer Size!");
8164   // For v = setjmp(buf), we generate
8165   //
8166   // thisMBB:
8167   //  SjLjSetup mainMBB
8168   //  bl mainMBB
8169   //  v_restore = 1
8170   //  b sinkMBB
8171   //
8172   // mainMBB:
8173   //  buf[LabelOffset] = LR
8174   //  v_main = 0
8175   //
8176   // sinkMBB:
8177   //  v = phi(main, restore)
8178   //
8179
8180   MachineBasicBlock *thisMBB = MBB;
8181   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
8182   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
8183   MF->insert(I, mainMBB);
8184   MF->insert(I, sinkMBB);
8185
8186   MachineInstrBuilder MIB;
8187
8188   // Transfer the remainder of BB and its successor edges to sinkMBB.
8189   sinkMBB->splice(sinkMBB->begin(), MBB,
8190                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
8191   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
8192
8193   // Note that the structure of the jmp_buf used here is not compatible
8194   // with that used by libc, and is not designed to be. Specifically, it
8195   // stores only those 'reserved' registers that LLVM does not otherwise
8196   // understand how to spill. Also, by convention, by the time this
8197   // intrinsic is called, Clang has already stored the frame address in the
8198   // first slot of the buffer and stack address in the third. Following the
8199   // X86 target code, we'll store the jump address in the second slot. We also
8200   // need to save the TOC pointer (R2) to handle jumps between shared
8201   // libraries, and that will be stored in the fourth slot. The thread
8202   // identifier (R13) is not affected.
8203
8204   // thisMBB:
8205   const int64_t LabelOffset = 1 * PVT.getStoreSize();
8206   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
8207   const int64_t BPOffset    = 4 * PVT.getStoreSize();
8208
8209   // Prepare IP either in reg.
8210   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
8211   unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
8212   unsigned BufReg = MI->getOperand(1).getReg();
8213
8214   if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
8215     setUsesTOCBasePtr(*MBB->getParent());
8216     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
8217             .addReg(PPC::X2)
8218             .addImm(TOCOffset)
8219             .addReg(BufReg);
8220     MIB.setMemRefs(MMOBegin, MMOEnd);
8221   }
8222
8223   // Naked functions never have a base pointer, and so we use r1. For all
8224   // other functions, this decision must be delayed until during PEI.
8225   unsigned BaseReg;
8226   if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
8227     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
8228   else
8229     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
8230
8231   MIB = BuildMI(*thisMBB, MI, DL,
8232                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
8233             .addReg(BaseReg)
8234             .addImm(BPOffset)
8235             .addReg(BufReg);
8236   MIB.setMemRefs(MMOBegin, MMOEnd);
8237
8238   // Setup
8239   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
8240   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
8241   MIB.addRegMask(TRI->getNoPreservedMask());
8242
8243   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
8244
8245   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
8246           .addMBB(mainMBB);
8247   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
8248
8249   thisMBB->addSuccessor(mainMBB, /* weight */ 0);
8250   thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
8251
8252   // mainMBB:
8253   //  mainDstReg = 0
8254   MIB =
8255       BuildMI(mainMBB, DL,
8256               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
8257
8258   // Store IP
8259   if (Subtarget.isPPC64()) {
8260     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
8261             .addReg(LabelReg)
8262             .addImm(LabelOffset)
8263             .addReg(BufReg);
8264   } else {
8265     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
8266             .addReg(LabelReg)
8267             .addImm(LabelOffset)
8268             .addReg(BufReg);
8269   }
8270
8271   MIB.setMemRefs(MMOBegin, MMOEnd);
8272
8273   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
8274   mainMBB->addSuccessor(sinkMBB);
8275
8276   // sinkMBB:
8277   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
8278           TII->get(PPC::PHI), DstReg)
8279     .addReg(mainDstReg).addMBB(mainMBB)
8280     .addReg(restoreDstReg).addMBB(thisMBB);
8281
8282   MI->eraseFromParent();
8283   return sinkMBB;
8284 }
8285
8286 MachineBasicBlock *
8287 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
8288                                      MachineBasicBlock *MBB) const {
8289   DebugLoc DL = MI->getDebugLoc();
8290   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8291
8292   MachineFunction *MF = MBB->getParent();
8293   MachineRegisterInfo &MRI = MF->getRegInfo();
8294
8295   // Memory Reference
8296   MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
8297   MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
8298
8299   MVT PVT = getPointerTy();
8300   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8301          "Invalid Pointer Size!");
8302
8303   const TargetRegisterClass *RC =
8304     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
8305   unsigned Tmp = MRI.createVirtualRegister(RC);
8306   // Since FP is only updated here but NOT referenced, it's treated as GPR.
8307   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
8308   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
8309   unsigned BP =
8310       (PVT == MVT::i64)
8311           ? PPC::X30
8312           : (Subtarget.isSVR4ABI() &&
8313                      MF->getTarget().getRelocationModel() == Reloc::PIC_
8314                  ? PPC::R29
8315                  : PPC::R30);
8316
8317   MachineInstrBuilder MIB;
8318
8319   const int64_t LabelOffset = 1 * PVT.getStoreSize();
8320   const int64_t SPOffset    = 2 * PVT.getStoreSize();
8321   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
8322   const int64_t BPOffset    = 4 * PVT.getStoreSize();
8323
8324   unsigned BufReg = MI->getOperand(0).getReg();
8325
8326   // Reload FP (the jumped-to function may not have had a
8327   // frame pointer, and if so, then its r31 will be restored
8328   // as necessary).
8329   if (PVT == MVT::i64) {
8330     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
8331             .addImm(0)
8332             .addReg(BufReg);
8333   } else {
8334     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
8335             .addImm(0)
8336             .addReg(BufReg);
8337   }
8338   MIB.setMemRefs(MMOBegin, MMOEnd);
8339
8340   // Reload IP
8341   if (PVT == MVT::i64) {
8342     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
8343             .addImm(LabelOffset)
8344             .addReg(BufReg);
8345   } else {
8346     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
8347             .addImm(LabelOffset)
8348             .addReg(BufReg);
8349   }
8350   MIB.setMemRefs(MMOBegin, MMOEnd);
8351
8352   // Reload SP
8353   if (PVT == MVT::i64) {
8354     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
8355             .addImm(SPOffset)
8356             .addReg(BufReg);
8357   } else {
8358     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
8359             .addImm(SPOffset)
8360             .addReg(BufReg);
8361   }
8362   MIB.setMemRefs(MMOBegin, MMOEnd);
8363
8364   // Reload BP
8365   if (PVT == MVT::i64) {
8366     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
8367             .addImm(BPOffset)
8368             .addReg(BufReg);
8369   } else {
8370     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
8371             .addImm(BPOffset)
8372             .addReg(BufReg);
8373   }
8374   MIB.setMemRefs(MMOBegin, MMOEnd);
8375
8376   // Reload TOC
8377   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
8378     setUsesTOCBasePtr(*MBB->getParent());
8379     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
8380             .addImm(TOCOffset)
8381             .addReg(BufReg);
8382
8383     MIB.setMemRefs(MMOBegin, MMOEnd);
8384   }
8385
8386   // Jump
8387   BuildMI(*MBB, MI, DL,
8388           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
8389   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
8390
8391   MI->eraseFromParent();
8392   return MBB;
8393 }
8394
8395 MachineBasicBlock *
8396 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
8397                                                MachineBasicBlock *BB) const {
8398   if (MI->getOpcode() == TargetOpcode::STACKMAP ||
8399       MI->getOpcode() == TargetOpcode::PATCHPOINT) {
8400     if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
8401         MI->getOpcode() == TargetOpcode::PATCHPOINT) {
8402       // Call lowering should have added an r2 operand to indicate a dependence
8403       // on the TOC base pointer value. It can't however, because there is no
8404       // way to mark the dependence as implicit there, and so the stackmap code
8405       // will confuse it with a regular operand. Instead, add the dependence
8406       // here.
8407       setUsesTOCBasePtr(*BB->getParent());
8408       MI->addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
8409     }
8410
8411     return emitPatchPoint(MI, BB);
8412   }
8413
8414   if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
8415       MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
8416     return emitEHSjLjSetJmp(MI, BB);
8417   } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
8418              MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
8419     return emitEHSjLjLongJmp(MI, BB);
8420   }
8421
8422   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8423
8424   // To "insert" these instructions we actually have to insert their
8425   // control-flow patterns.
8426   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8427   MachineFunction::iterator It = BB;
8428   ++It;
8429
8430   MachineFunction *F = BB->getParent();
8431
8432   if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
8433                               MI->getOpcode() == PPC::SELECT_CC_I8 ||
8434                               MI->getOpcode() == PPC::SELECT_I4 ||
8435                               MI->getOpcode() == PPC::SELECT_I8)) {
8436     SmallVector<MachineOperand, 2> Cond;
8437     if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
8438         MI->getOpcode() == PPC::SELECT_CC_I8)
8439       Cond.push_back(MI->getOperand(4));
8440     else
8441       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
8442     Cond.push_back(MI->getOperand(1));
8443
8444     DebugLoc dl = MI->getDebugLoc();
8445     TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
8446                       Cond, MI->getOperand(2).getReg(),
8447                       MI->getOperand(3).getReg());
8448   } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
8449              MI->getOpcode() == PPC::SELECT_CC_I8 ||
8450              MI->getOpcode() == PPC::SELECT_CC_F4 ||
8451              MI->getOpcode() == PPC::SELECT_CC_F8 ||
8452              MI->getOpcode() == PPC::SELECT_CC_QFRC ||
8453              MI->getOpcode() == PPC::SELECT_CC_QSRC ||
8454              MI->getOpcode() == PPC::SELECT_CC_QBRC ||
8455              MI->getOpcode() == PPC::SELECT_CC_VRRC ||
8456              MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
8457              MI->getOpcode() == PPC::SELECT_CC_VSSRC ||
8458              MI->getOpcode() == PPC::SELECT_CC_VSRC ||
8459              MI->getOpcode() == PPC::SELECT_I4 ||
8460              MI->getOpcode() == PPC::SELECT_I8 ||
8461              MI->getOpcode() == PPC::SELECT_F4 ||
8462              MI->getOpcode() == PPC::SELECT_F8 ||
8463              MI->getOpcode() == PPC::SELECT_QFRC ||
8464              MI->getOpcode() == PPC::SELECT_QSRC ||
8465              MI->getOpcode() == PPC::SELECT_QBRC ||
8466              MI->getOpcode() == PPC::SELECT_VRRC ||
8467              MI->getOpcode() == PPC::SELECT_VSFRC ||
8468              MI->getOpcode() == PPC::SELECT_VSSRC ||
8469              MI->getOpcode() == PPC::SELECT_VSRC) {
8470     // The incoming instruction knows the destination vreg to set, the
8471     // condition code register to branch on, the true/false values to
8472     // select between, and a branch opcode to use.
8473
8474     //  thisMBB:
8475     //  ...
8476     //   TrueVal = ...
8477     //   cmpTY ccX, r1, r2
8478     //   bCC copy1MBB
8479     //   fallthrough --> copy0MBB
8480     MachineBasicBlock *thisMBB = BB;
8481     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
8482     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
8483     DebugLoc dl = MI->getDebugLoc();
8484     F->insert(It, copy0MBB);
8485     F->insert(It, sinkMBB);
8486
8487     // Transfer the remainder of BB and its successor edges to sinkMBB.
8488     sinkMBB->splice(sinkMBB->begin(), BB,
8489                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
8490     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
8491
8492     // Next, add the true and fallthrough blocks as its successors.
8493     BB->addSuccessor(copy0MBB);
8494     BB->addSuccessor(sinkMBB);
8495
8496     if (MI->getOpcode() == PPC::SELECT_I4 ||
8497         MI->getOpcode() == PPC::SELECT_I8 ||
8498         MI->getOpcode() == PPC::SELECT_F4 ||
8499         MI->getOpcode() == PPC::SELECT_F8 ||
8500         MI->getOpcode() == PPC::SELECT_QFRC ||
8501         MI->getOpcode() == PPC::SELECT_QSRC ||
8502         MI->getOpcode() == PPC::SELECT_QBRC ||
8503         MI->getOpcode() == PPC::SELECT_VRRC ||
8504         MI->getOpcode() == PPC::SELECT_VSFRC ||
8505         MI->getOpcode() == PPC::SELECT_VSSRC ||
8506         MI->getOpcode() == PPC::SELECT_VSRC) {
8507       BuildMI(BB, dl, TII->get(PPC::BC))
8508         .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
8509     } else {
8510       unsigned SelectPred = MI->getOperand(4).getImm();
8511       BuildMI(BB, dl, TII->get(PPC::BCC))
8512         .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
8513     }
8514
8515     //  copy0MBB:
8516     //   %FalseValue = ...
8517     //   # fallthrough to sinkMBB
8518     BB = copy0MBB;
8519
8520     // Update machine-CFG edges
8521     BB->addSuccessor(sinkMBB);
8522
8523     //  sinkMBB:
8524     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
8525     //  ...
8526     BB = sinkMBB;
8527     BuildMI(*BB, BB->begin(), dl,
8528             TII->get(PPC::PHI), MI->getOperand(0).getReg())
8529       .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
8530       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
8531   } else if (MI->getOpcode() == PPC::ReadTB) {
8532     // To read the 64-bit time-base register on a 32-bit target, we read the
8533     // two halves. Should the counter have wrapped while it was being read, we
8534     // need to try again.
8535     // ...
8536     // readLoop:
8537     // mfspr Rx,TBU # load from TBU
8538     // mfspr Ry,TB  # load from TB
8539     // mfspr Rz,TBU # load from TBU
8540     // cmpw crX,Rx,Rz # check if ‘old’=’new’
8541     // bne readLoop   # branch if they're not equal
8542     // ...
8543
8544     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
8545     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
8546     DebugLoc dl = MI->getDebugLoc();
8547     F->insert(It, readMBB);
8548     F->insert(It, sinkMBB);
8549
8550     // Transfer the remainder of BB and its successor edges to sinkMBB.
8551     sinkMBB->splice(sinkMBB->begin(), BB,
8552                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
8553     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
8554
8555     BB->addSuccessor(readMBB);
8556     BB = readMBB;
8557
8558     MachineRegisterInfo &RegInfo = F->getRegInfo();
8559     unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
8560     unsigned LoReg = MI->getOperand(0).getReg();
8561     unsigned HiReg = MI->getOperand(1).getReg();
8562
8563     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
8564     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
8565     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
8566
8567     unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
8568
8569     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
8570       .addReg(HiReg).addReg(ReadAgainReg);
8571     BuildMI(BB, dl, TII->get(PPC::BCC))
8572       .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
8573
8574     BB->addSuccessor(readMBB);
8575     BB->addSuccessor(sinkMBB);
8576   }
8577   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
8578     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
8579   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
8580     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
8581   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
8582     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
8583   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
8584     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
8585
8586   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
8587     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
8588   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
8589     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
8590   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
8591     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
8592   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
8593     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
8594
8595   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
8596     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
8597   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
8598     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
8599   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
8600     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
8601   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
8602     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
8603
8604   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
8605     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
8606   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
8607     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
8608   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
8609     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
8610   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
8611     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
8612
8613   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
8614     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
8615   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
8616     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
8617   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
8618     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
8619   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
8620     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
8621
8622   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
8623     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
8624   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
8625     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
8626   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
8627     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
8628   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
8629     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
8630
8631   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
8632     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
8633   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
8634     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
8635   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
8636     BB = EmitAtomicBinary(MI, BB, 4, 0);
8637   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
8638     BB = EmitAtomicBinary(MI, BB, 8, 0);
8639
8640   else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
8641            MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
8642            (Subtarget.hasPartwordAtomics() &&
8643             MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
8644            (Subtarget.hasPartwordAtomics() &&
8645             MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
8646     bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
8647
8648     auto LoadMnemonic = PPC::LDARX;
8649     auto StoreMnemonic = PPC::STDCX;
8650     switch(MI->getOpcode()) {
8651     default:
8652       llvm_unreachable("Compare and swap of unknown size");
8653     case PPC::ATOMIC_CMP_SWAP_I8:
8654       LoadMnemonic = PPC::LBARX;
8655       StoreMnemonic = PPC::STBCX;
8656       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
8657       break;
8658     case PPC::ATOMIC_CMP_SWAP_I16:
8659       LoadMnemonic = PPC::LHARX;
8660       StoreMnemonic = PPC::STHCX;
8661       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
8662       break;
8663     case PPC::ATOMIC_CMP_SWAP_I32:
8664       LoadMnemonic = PPC::LWARX;
8665       StoreMnemonic = PPC::STWCX;
8666       break;
8667     case PPC::ATOMIC_CMP_SWAP_I64:
8668       LoadMnemonic = PPC::LDARX;
8669       StoreMnemonic = PPC::STDCX;
8670       break;
8671     }
8672     unsigned dest   = MI->getOperand(0).getReg();
8673     unsigned ptrA   = MI->getOperand(1).getReg();
8674     unsigned ptrB   = MI->getOperand(2).getReg();
8675     unsigned oldval = MI->getOperand(3).getReg();
8676     unsigned newval = MI->getOperand(4).getReg();
8677     DebugLoc dl     = MI->getDebugLoc();
8678
8679     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
8680     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
8681     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
8682     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8683     F->insert(It, loop1MBB);
8684     F->insert(It, loop2MBB);
8685     F->insert(It, midMBB);
8686     F->insert(It, exitMBB);
8687     exitMBB->splice(exitMBB->begin(), BB,
8688                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
8689     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8690
8691     //  thisMBB:
8692     //   ...
8693     //   fallthrough --> loopMBB
8694     BB->addSuccessor(loop1MBB);
8695
8696     // loop1MBB:
8697     //   l[bhwd]arx dest, ptr
8698     //   cmp[wd] dest, oldval
8699     //   bne- midMBB
8700     // loop2MBB:
8701     //   st[bhwd]cx. newval, ptr
8702     //   bne- loopMBB
8703     //   b exitBB
8704     // midMBB:
8705     //   st[bhwd]cx. dest, ptr
8706     // exitBB:
8707     BB = loop1MBB;
8708     BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
8709       .addReg(ptrA).addReg(ptrB);
8710     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
8711       .addReg(oldval).addReg(dest);
8712     BuildMI(BB, dl, TII->get(PPC::BCC))
8713       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
8714     BB->addSuccessor(loop2MBB);
8715     BB->addSuccessor(midMBB);
8716
8717     BB = loop2MBB;
8718     BuildMI(BB, dl, TII->get(StoreMnemonic))
8719       .addReg(newval).addReg(ptrA).addReg(ptrB);
8720     BuildMI(BB, dl, TII->get(PPC::BCC))
8721       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
8722     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
8723     BB->addSuccessor(loop1MBB);
8724     BB->addSuccessor(exitMBB);
8725
8726     BB = midMBB;
8727     BuildMI(BB, dl, TII->get(StoreMnemonic))
8728       .addReg(dest).addReg(ptrA).addReg(ptrB);
8729     BB->addSuccessor(exitMBB);
8730
8731     //  exitMBB:
8732     //   ...
8733     BB = exitMBB;
8734   } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
8735              MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
8736     // We must use 64-bit registers for addresses when targeting 64-bit,
8737     // since we're actually doing arithmetic on them.  Other registers
8738     // can be 32-bit.
8739     bool is64bit = Subtarget.isPPC64();
8740     bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
8741
8742     unsigned dest   = MI->getOperand(0).getReg();
8743     unsigned ptrA   = MI->getOperand(1).getReg();
8744     unsigned ptrB   = MI->getOperand(2).getReg();
8745     unsigned oldval = MI->getOperand(3).getReg();
8746     unsigned newval = MI->getOperand(4).getReg();
8747     DebugLoc dl     = MI->getDebugLoc();
8748
8749     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
8750     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
8751     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
8752     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8753     F->insert(It, loop1MBB);
8754     F->insert(It, loop2MBB);
8755     F->insert(It, midMBB);
8756     F->insert(It, exitMBB);
8757     exitMBB->splice(exitMBB->begin(), BB,
8758                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
8759     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8760
8761     MachineRegisterInfo &RegInfo = F->getRegInfo();
8762     const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
8763                                             : &PPC::GPRCRegClass;
8764     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
8765     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
8766     unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
8767     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
8768     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
8769     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
8770     unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
8771     unsigned MaskReg = RegInfo.createVirtualRegister(RC);
8772     unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
8773     unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
8774     unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
8775     unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
8776     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
8777     unsigned Ptr1Reg;
8778     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
8779     unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
8780     //  thisMBB:
8781     //   ...
8782     //   fallthrough --> loopMBB
8783     BB->addSuccessor(loop1MBB);
8784
8785     // The 4-byte load must be aligned, while a char or short may be
8786     // anywhere in the word.  Hence all this nasty bookkeeping code.
8787     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
8788     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
8789     //   xori shift, shift1, 24 [16]
8790     //   rlwinm ptr, ptr1, 0, 0, 29
8791     //   slw newval2, newval, shift
8792     //   slw oldval2, oldval,shift
8793     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
8794     //   slw mask, mask2, shift
8795     //   and newval3, newval2, mask
8796     //   and oldval3, oldval2, mask
8797     // loop1MBB:
8798     //   lwarx tmpDest, ptr
8799     //   and tmp, tmpDest, mask
8800     //   cmpw tmp, oldval3
8801     //   bne- midMBB
8802     // loop2MBB:
8803     //   andc tmp2, tmpDest, mask
8804     //   or tmp4, tmp2, newval3
8805     //   stwcx. tmp4, ptr
8806     //   bne- loop1MBB
8807     //   b exitBB
8808     // midMBB:
8809     //   stwcx. tmpDest, ptr
8810     // exitBB:
8811     //   srw dest, tmpDest, shift
8812     if (ptrA != ZeroReg) {
8813       Ptr1Reg = RegInfo.createVirtualRegister(RC);
8814       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
8815         .addReg(ptrA).addReg(ptrB);
8816     } else {
8817       Ptr1Reg = ptrB;
8818     }
8819     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
8820         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
8821     BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
8822         .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
8823     if (is64bit)
8824       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
8825         .addReg(Ptr1Reg).addImm(0).addImm(61);
8826     else
8827       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
8828         .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
8829     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
8830         .addReg(newval).addReg(ShiftReg);
8831     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
8832         .addReg(oldval).addReg(ShiftReg);
8833     if (is8bit)
8834       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
8835     else {
8836       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
8837       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
8838         .addReg(Mask3Reg).addImm(65535);
8839     }
8840     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
8841         .addReg(Mask2Reg).addReg(ShiftReg);
8842     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
8843         .addReg(NewVal2Reg).addReg(MaskReg);
8844     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
8845         .addReg(OldVal2Reg).addReg(MaskReg);
8846
8847     BB = loop1MBB;
8848     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
8849         .addReg(ZeroReg).addReg(PtrReg);
8850     BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
8851         .addReg(TmpDestReg).addReg(MaskReg);
8852     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
8853         .addReg(TmpReg).addReg(OldVal3Reg);
8854     BuildMI(BB, dl, TII->get(PPC::BCC))
8855         .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
8856     BB->addSuccessor(loop2MBB);
8857     BB->addSuccessor(midMBB);
8858
8859     BB = loop2MBB;
8860     BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
8861         .addReg(TmpDestReg).addReg(MaskReg);
8862     BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
8863         .addReg(Tmp2Reg).addReg(NewVal3Reg);
8864     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
8865         .addReg(ZeroReg).addReg(PtrReg);
8866     BuildMI(BB, dl, TII->get(PPC::BCC))
8867       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
8868     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
8869     BB->addSuccessor(loop1MBB);
8870     BB->addSuccessor(exitMBB);
8871
8872     BB = midMBB;
8873     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
8874       .addReg(ZeroReg).addReg(PtrReg);
8875     BB->addSuccessor(exitMBB);
8876
8877     //  exitMBB:
8878     //   ...
8879     BB = exitMBB;
8880     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
8881       .addReg(ShiftReg);
8882   } else if (MI->getOpcode() == PPC::FADDrtz) {
8883     // This pseudo performs an FADD with rounding mode temporarily forced
8884     // to round-to-zero.  We emit this via custom inserter since the FPSCR
8885     // is not modeled at the SelectionDAG level.
8886     unsigned Dest = MI->getOperand(0).getReg();
8887     unsigned Src1 = MI->getOperand(1).getReg();
8888     unsigned Src2 = MI->getOperand(2).getReg();
8889     DebugLoc dl   = MI->getDebugLoc();
8890
8891     MachineRegisterInfo &RegInfo = F->getRegInfo();
8892     unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
8893
8894     // Save FPSCR value.
8895     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
8896
8897     // Set rounding mode to round-to-zero.
8898     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
8899     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
8900
8901     // Perform addition.
8902     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
8903
8904     // Restore FPSCR value.
8905     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
8906   } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
8907              MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
8908              MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
8909              MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {
8910     unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
8911                        MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?
8912                       PPC::ANDIo8 : PPC::ANDIo;
8913     bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
8914                  MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);
8915
8916     MachineRegisterInfo &RegInfo = F->getRegInfo();
8917     unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
8918                                                   &PPC::GPRCRegClass :
8919                                                   &PPC::G8RCRegClass);
8920
8921     DebugLoc dl   = MI->getDebugLoc();
8922     BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
8923       .addReg(MI->getOperand(1).getReg()).addImm(1);
8924     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
8925             MI->getOperand(0).getReg())
8926       .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
8927   } else if (MI->getOpcode() == PPC::TCHECK_RET) {
8928     DebugLoc Dl = MI->getDebugLoc();
8929     MachineRegisterInfo &RegInfo = F->getRegInfo();
8930     unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
8931     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
8932     return BB;
8933   } else {
8934     llvm_unreachable("Unexpected instr type to insert");
8935   }
8936
8937   MI->eraseFromParent();   // The pseudo instruction is gone now.
8938   return BB;
8939 }
8940
8941 //===----------------------------------------------------------------------===//
8942 // Target Optimization Hooks
8943 //===----------------------------------------------------------------------===//
8944
8945 SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
8946                                             DAGCombinerInfo &DCI,
8947                                             unsigned &RefinementSteps,
8948                                             bool &UseOneConstNR) const {
8949   EVT VT = Operand.getValueType();
8950   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
8951       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
8952       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
8953       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
8954       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
8955       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
8956     // Convergence is quadratic, so we essentially double the number of digits
8957     // correct after every iteration. For both FRE and FRSQRTE, the minimum
8958     // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
8959     // 2^-14. IEEE float has 23 digits and double has 52 digits.
8960     RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
8961     if (VT.getScalarType() == MVT::f64)
8962       ++RefinementSteps;
8963     UseOneConstNR = true;
8964     return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
8965   }
8966   return SDValue();
8967 }
8968
8969 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
8970                                             DAGCombinerInfo &DCI,
8971                                             unsigned &RefinementSteps) const {
8972   EVT VT = Operand.getValueType();
8973   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
8974       (VT == MVT::f64 && Subtarget.hasFRE()) ||
8975       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
8976       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
8977       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
8978       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
8979     // Convergence is quadratic, so we essentially double the number of digits
8980     // correct after every iteration. For both FRE and FRSQRTE, the minimum
8981     // architected relative accuracy is 2^-5. When hasRecipPrec(), this is
8982     // 2^-14. IEEE float has 23 digits and double has 52 digits.
8983     RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
8984     if (VT.getScalarType() == MVT::f64)
8985       ++RefinementSteps;
8986     return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
8987   }
8988   return SDValue();
8989 }
8990
8991 bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
8992   // Note: This functionality is used only when unsafe-fp-math is enabled, and
8993   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
8994   // enabled for division), this functionality is redundant with the default
8995   // combiner logic (once the division -> reciprocal/multiply transformation
8996   // has taken place). As a result, this matters more for older cores than for
8997   // newer ones.
8998
8999   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9000   // reciprocal if there are two or more FDIVs (for embedded cores with only
9001   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
9002   switch (Subtarget.getDarwinDirective()) {
9003   default:
9004     return NumUsers > 2;
9005   case PPC::DIR_440:
9006   case PPC::DIR_A2:
9007   case PPC::DIR_E500mc:
9008   case PPC::DIR_E5500:
9009     return NumUsers > 1;
9010   }
9011 }
9012
9013 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
9014                             unsigned Bytes, int Dist,
9015                             SelectionDAG &DAG) {
9016   if (VT.getSizeInBits() / 8 != Bytes)
9017     return false;
9018
9019   SDValue BaseLoc = Base->getBasePtr();
9020   if (Loc.getOpcode() == ISD::FrameIndex) {
9021     if (BaseLoc.getOpcode() != ISD::FrameIndex)
9022       return false;
9023     const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
9024     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
9025     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
9026     int FS  = MFI->getObjectSize(FI);
9027     int BFS = MFI->getObjectSize(BFI);
9028     if (FS != BFS || FS != (int)Bytes) return false;
9029     return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
9030   }
9031
9032   // Handle X+C
9033   if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
9034       cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
9035     return true;
9036
9037   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9038   const GlobalValue *GV1 = nullptr;
9039   const GlobalValue *GV2 = nullptr;
9040   int64_t Offset1 = 0;
9041   int64_t Offset2 = 0;
9042   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
9043   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
9044   if (isGA1 && isGA2 && GV1 == GV2)
9045     return Offset1 == (Offset2 + Dist*Bytes);
9046   return false;
9047 }
9048
9049 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
9050 // not enforce equality of the chain operands.
9051 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
9052                             unsigned Bytes, int Dist,
9053                             SelectionDAG &DAG) {
9054   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
9055     EVT VT = LS->getMemoryVT();
9056     SDValue Loc = LS->getBasePtr();
9057     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
9058   }
9059
9060   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
9061     EVT VT;
9062     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9063     default: return false;
9064     case Intrinsic::ppc_qpx_qvlfd:
9065     case Intrinsic::ppc_qpx_qvlfda:
9066       VT = MVT::v4f64;
9067       break;
9068     case Intrinsic::ppc_qpx_qvlfs:
9069     case Intrinsic::ppc_qpx_qvlfsa:
9070       VT = MVT::v4f32;
9071       break;
9072     case Intrinsic::ppc_qpx_qvlfcd:
9073     case Intrinsic::ppc_qpx_qvlfcda:
9074       VT = MVT::v2f64;
9075       break;
9076     case Intrinsic::ppc_qpx_qvlfcs:
9077     case Intrinsic::ppc_qpx_qvlfcsa:
9078       VT = MVT::v2f32;
9079       break;
9080     case Intrinsic::ppc_qpx_qvlfiwa:
9081     case Intrinsic::ppc_qpx_qvlfiwz:
9082     case Intrinsic::ppc_altivec_lvx:
9083     case Intrinsic::ppc_altivec_lvxl:
9084     case Intrinsic::ppc_vsx_lxvw4x:
9085       VT = MVT::v4i32;
9086       break;
9087     case Intrinsic::ppc_vsx_lxvd2x:
9088       VT = MVT::v2f64;
9089       break;
9090     case Intrinsic::ppc_altivec_lvebx:
9091       VT = MVT::i8;
9092       break;
9093     case Intrinsic::ppc_altivec_lvehx:
9094       VT = MVT::i16;
9095       break;
9096     case Intrinsic::ppc_altivec_lvewx:
9097       VT = MVT::i32;
9098       break;
9099     }
9100
9101     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
9102   }
9103
9104   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
9105     EVT VT;
9106     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9107     default: return false;
9108     case Intrinsic::ppc_qpx_qvstfd:
9109     case Intrinsic::ppc_qpx_qvstfda:
9110       VT = MVT::v4f64;
9111       break;
9112     case Intrinsic::ppc_qpx_qvstfs:
9113     case Intrinsic::ppc_qpx_qvstfsa:
9114       VT = MVT::v4f32;
9115       break;
9116     case Intrinsic::ppc_qpx_qvstfcd:
9117     case Intrinsic::ppc_qpx_qvstfcda:
9118       VT = MVT::v2f64;
9119       break;
9120     case Intrinsic::ppc_qpx_qvstfcs:
9121     case Intrinsic::ppc_qpx_qvstfcsa:
9122       VT = MVT::v2f32;
9123       break;
9124     case Intrinsic::ppc_qpx_qvstfiw:
9125     case Intrinsic::ppc_qpx_qvstfiwa:
9126     case Intrinsic::ppc_altivec_stvx:
9127     case Intrinsic::ppc_altivec_stvxl:
9128     case Intrinsic::ppc_vsx_stxvw4x:
9129       VT = MVT::v4i32;
9130       break;
9131     case Intrinsic::ppc_vsx_stxvd2x:
9132       VT = MVT::v2f64;
9133       break;
9134     case Intrinsic::ppc_altivec_stvebx:
9135       VT = MVT::i8;
9136       break;
9137     case Intrinsic::ppc_altivec_stvehx:
9138       VT = MVT::i16;
9139       break;
9140     case Intrinsic::ppc_altivec_stvewx:
9141       VT = MVT::i32;
9142       break;
9143     }
9144
9145     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
9146   }
9147
9148   return false;
9149 }
9150
9151 // Return true is there is a nearyby consecutive load to the one provided
9152 // (regardless of alignment). We search up and down the chain, looking though
9153 // token factors and other loads (but nothing else). As a result, a true result
9154 // indicates that it is safe to create a new consecutive load adjacent to the
9155 // load provided.
9156 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
9157   SDValue Chain = LD->getChain();
9158   EVT VT = LD->getMemoryVT();
9159
9160   SmallSet<SDNode *, 16> LoadRoots;
9161   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
9162   SmallSet<SDNode *, 16> Visited;
9163
9164   // First, search up the chain, branching to follow all token-factor operands.
9165   // If we find a consecutive load, then we're done, otherwise, record all
9166   // nodes just above the top-level loads and token factors.
9167   while (!Queue.empty()) {
9168     SDNode *ChainNext = Queue.pop_back_val();
9169     if (!Visited.insert(ChainNext).second)
9170       continue;
9171
9172     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
9173       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
9174         return true;
9175
9176       if (!Visited.count(ChainLD->getChain().getNode()))
9177         Queue.push_back(ChainLD->getChain().getNode());
9178     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
9179       for (const SDUse &O : ChainNext->ops())
9180         if (!Visited.count(O.getNode()))
9181           Queue.push_back(O.getNode());
9182     } else
9183       LoadRoots.insert(ChainNext);
9184   }
9185
9186   // Second, search down the chain, starting from the top-level nodes recorded
9187   // in the first phase. These top-level nodes are the nodes just above all
9188   // loads and token factors. Starting with their uses, recursively look though
9189   // all loads (just the chain uses) and token factors to find a consecutive
9190   // load.
9191   Visited.clear();
9192   Queue.clear();
9193
9194   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
9195        IE = LoadRoots.end(); I != IE; ++I) {
9196     Queue.push_back(*I);
9197
9198     while (!Queue.empty()) {
9199       SDNode *LoadRoot = Queue.pop_back_val();
9200       if (!Visited.insert(LoadRoot).second)
9201         continue;
9202
9203       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
9204         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
9205           return true;
9206
9207       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
9208            UE = LoadRoot->use_end(); UI != UE; ++UI)
9209         if (((isa<MemSDNode>(*UI) &&
9210             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
9211             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
9212           Queue.push_back(*UI);
9213     }
9214   }
9215
9216   return false;
9217 }
9218
9219 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
9220                                                   DAGCombinerInfo &DCI) const {
9221   SelectionDAG &DAG = DCI.DAG;
9222   SDLoc dl(N);
9223
9224   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
9225   // If we're tracking CR bits, we need to be careful that we don't have:
9226   //   trunc(binary-ops(zext(x), zext(y)))
9227   // or
9228   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
9229   // such that we're unnecessarily moving things into GPRs when it would be
9230   // better to keep them in CR bits.
9231
9232   // Note that trunc here can be an actual i1 trunc, or can be the effective
9233   // truncation that comes from a setcc or select_cc.
9234   if (N->getOpcode() == ISD::TRUNCATE &&
9235       N->getValueType(0) != MVT::i1)
9236     return SDValue();
9237
9238   if (N->getOperand(0).getValueType() != MVT::i32 &&
9239       N->getOperand(0).getValueType() != MVT::i64)
9240     return SDValue();
9241
9242   if (N->getOpcode() == ISD::SETCC ||
9243       N->getOpcode() == ISD::SELECT_CC) {
9244     // If we're looking at a comparison, then we need to make sure that the
9245     // high bits (all except for the first) don't matter the result.
9246     ISD::CondCode CC =
9247       cast<CondCodeSDNode>(N->getOperand(
9248         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
9249     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
9250
9251     if (ISD::isSignedIntSetCC(CC)) {
9252       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
9253           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
9254         return SDValue();
9255     } else if (ISD::isUnsignedIntSetCC(CC)) {
9256       if (!DAG.MaskedValueIsZero(N->getOperand(0),
9257                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
9258           !DAG.MaskedValueIsZero(N->getOperand(1),
9259                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
9260         return SDValue();
9261     } else {
9262       // This is neither a signed nor an unsigned comparison, just make sure
9263       // that the high bits are equal.
9264       APInt Op1Zero, Op1One;
9265       APInt Op2Zero, Op2One;
9266       DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
9267       DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
9268
9269       // We don't really care about what is known about the first bit (if
9270       // anything), so clear it in all masks prior to comparing them.
9271       Op1Zero.clearBit(0); Op1One.clearBit(0);
9272       Op2Zero.clearBit(0); Op2One.clearBit(0);
9273
9274       if (Op1Zero != Op2Zero || Op1One != Op2One)
9275         return SDValue();
9276     }
9277   }
9278
9279   // We now know that the higher-order bits are irrelevant, we just need to
9280   // make sure that all of the intermediate operations are bit operations, and
9281   // all inputs are extensions.
9282   if (N->getOperand(0).getOpcode() != ISD::AND &&
9283       N->getOperand(0).getOpcode() != ISD::OR  &&
9284       N->getOperand(0).getOpcode() != ISD::XOR &&
9285       N->getOperand(0).getOpcode() != ISD::SELECT &&
9286       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
9287       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
9288       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
9289       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
9290       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
9291     return SDValue();
9292
9293   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
9294       N->getOperand(1).getOpcode() != ISD::AND &&
9295       N->getOperand(1).getOpcode() != ISD::OR  &&
9296       N->getOperand(1).getOpcode() != ISD::XOR &&
9297       N->getOperand(1).getOpcode() != ISD::SELECT &&
9298       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
9299       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
9300       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
9301       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
9302       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
9303     return SDValue();
9304
9305   SmallVector<SDValue, 4> Inputs;
9306   SmallVector<SDValue, 8> BinOps, PromOps;
9307   SmallPtrSet<SDNode *, 16> Visited;
9308
9309   for (unsigned i = 0; i < 2; ++i) {
9310     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9311           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9312           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
9313           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
9314         isa<ConstantSDNode>(N->getOperand(i)))
9315       Inputs.push_back(N->getOperand(i));
9316     else
9317       BinOps.push_back(N->getOperand(i));
9318
9319     if (N->getOpcode() == ISD::TRUNCATE)
9320       break;
9321   }
9322
9323   // Visit all inputs, collect all binary operations (and, or, xor and
9324   // select) that are all fed by extensions.
9325   while (!BinOps.empty()) {
9326     SDValue BinOp = BinOps.back();
9327     BinOps.pop_back();
9328
9329     if (!Visited.insert(BinOp.getNode()).second)
9330       continue;
9331
9332     PromOps.push_back(BinOp);
9333
9334     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
9335       // The condition of the select is not promoted.
9336       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
9337         continue;
9338       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
9339         continue;
9340
9341       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9342             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9343             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
9344            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
9345           isa<ConstantSDNode>(BinOp.getOperand(i))) {
9346         Inputs.push_back(BinOp.getOperand(i));
9347       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
9348                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
9349                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
9350                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
9351                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
9352                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
9353                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9354                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9355                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
9356         BinOps.push_back(BinOp.getOperand(i));
9357       } else {
9358         // We have an input that is not an extension or another binary
9359         // operation; we'll abort this transformation.
9360         return SDValue();
9361       }
9362     }
9363   }
9364
9365   // Make sure that this is a self-contained cluster of operations (which
9366   // is not quite the same thing as saying that everything has only one
9367   // use).
9368   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9369     if (isa<ConstantSDNode>(Inputs[i]))
9370       continue;
9371
9372     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
9373                               UE = Inputs[i].getNode()->use_end();
9374          UI != UE; ++UI) {
9375       SDNode *User = *UI;
9376       if (User != N && !Visited.count(User))
9377         return SDValue();
9378
9379       // Make sure that we're not going to promote the non-output-value
9380       // operand(s) or SELECT or SELECT_CC.
9381       // FIXME: Although we could sometimes handle this, and it does occur in
9382       // practice that one of the condition inputs to the select is also one of
9383       // the outputs, we currently can't deal with this.
9384       if (User->getOpcode() == ISD::SELECT) {
9385         if (User->getOperand(0) == Inputs[i])
9386           return SDValue();
9387       } else if (User->getOpcode() == ISD::SELECT_CC) {
9388         if (User->getOperand(0) == Inputs[i] ||
9389             User->getOperand(1) == Inputs[i])
9390           return SDValue();
9391       }
9392     }
9393   }
9394
9395   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
9396     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
9397                               UE = PromOps[i].getNode()->use_end();
9398          UI != UE; ++UI) {
9399       SDNode *User = *UI;
9400       if (User != N && !Visited.count(User))
9401         return SDValue();
9402
9403       // Make sure that we're not going to promote the non-output-value
9404       // operand(s) or SELECT or SELECT_CC.
9405       // FIXME: Although we could sometimes handle this, and it does occur in
9406       // practice that one of the condition inputs to the select is also one of
9407       // the outputs, we currently can't deal with this.
9408       if (User->getOpcode() == ISD::SELECT) {
9409         if (User->getOperand(0) == PromOps[i])
9410           return SDValue();
9411       } else if (User->getOpcode() == ISD::SELECT_CC) {
9412         if (User->getOperand(0) == PromOps[i] ||
9413             User->getOperand(1) == PromOps[i])
9414           return SDValue();
9415       }
9416     }
9417   }
9418
9419   // Replace all inputs with the extension operand.
9420   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9421     // Constants may have users outside the cluster of to-be-promoted nodes,
9422     // and so we need to replace those as we do the promotions.
9423     if (isa<ConstantSDNode>(Inputs[i]))
9424       continue;
9425     else
9426       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
9427   }
9428
9429   // Replace all operations (these are all the same, but have a different
9430   // (i1) return type). DAG.getNode will validate that the types of
9431   // a binary operator match, so go through the list in reverse so that
9432   // we've likely promoted both operands first. Any intermediate truncations or
9433   // extensions disappear.
9434   while (!PromOps.empty()) {
9435     SDValue PromOp = PromOps.back();
9436     PromOps.pop_back();
9437
9438     if (PromOp.getOpcode() == ISD::TRUNCATE ||
9439         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
9440         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
9441         PromOp.getOpcode() == ISD::ANY_EXTEND) {
9442       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
9443           PromOp.getOperand(0).getValueType() != MVT::i1) {
9444         // The operand is not yet ready (see comment below).
9445         PromOps.insert(PromOps.begin(), PromOp);
9446         continue;
9447       }
9448
9449       SDValue RepValue = PromOp.getOperand(0);
9450       if (isa<ConstantSDNode>(RepValue))
9451         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
9452
9453       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
9454       continue;
9455     }
9456
9457     unsigned C;
9458     switch (PromOp.getOpcode()) {
9459     default:             C = 0; break;
9460     case ISD::SELECT:    C = 1; break;
9461     case ISD::SELECT_CC: C = 2; break;
9462     }
9463
9464     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
9465          PromOp.getOperand(C).getValueType() != MVT::i1) ||
9466         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
9467          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
9468       // The to-be-promoted operands of this node have not yet been
9469       // promoted (this should be rare because we're going through the
9470       // list backward, but if one of the operands has several users in
9471       // this cluster of to-be-promoted nodes, it is possible).
9472       PromOps.insert(PromOps.begin(), PromOp);
9473       continue;
9474     }
9475
9476     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
9477                                 PromOp.getNode()->op_end());
9478
9479     // If there are any constant inputs, make sure they're replaced now.
9480     for (unsigned i = 0; i < 2; ++i)
9481       if (isa<ConstantSDNode>(Ops[C+i]))
9482         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
9483
9484     DAG.ReplaceAllUsesOfValueWith(PromOp,
9485       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
9486   }
9487
9488   // Now we're left with the initial truncation itself.
9489   if (N->getOpcode() == ISD::TRUNCATE)
9490     return N->getOperand(0);
9491
9492   // Otherwise, this is a comparison. The operands to be compared have just
9493   // changed type (to i1), but everything else is the same.
9494   return SDValue(N, 0);
9495 }
9496
9497 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
9498                                                   DAGCombinerInfo &DCI) const {
9499   SelectionDAG &DAG = DCI.DAG;
9500   SDLoc dl(N);
9501
9502   // If we're tracking CR bits, we need to be careful that we don't have:
9503   //   zext(binary-ops(trunc(x), trunc(y)))
9504   // or
9505   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
9506   // such that we're unnecessarily moving things into CR bits that can more
9507   // efficiently stay in GPRs. Note that if we're not certain that the high
9508   // bits are set as required by the final extension, we still may need to do
9509   // some masking to get the proper behavior.
9510
9511   // This same functionality is important on PPC64 when dealing with
9512   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
9513   // the return values of functions. Because it is so similar, it is handled
9514   // here as well.
9515
9516   if (N->getValueType(0) != MVT::i32 &&
9517       N->getValueType(0) != MVT::i64)
9518     return SDValue();
9519
9520   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
9521         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
9522     return SDValue();
9523
9524   if (N->getOperand(0).getOpcode() != ISD::AND &&
9525       N->getOperand(0).getOpcode() != ISD::OR  &&
9526       N->getOperand(0).getOpcode() != ISD::XOR &&
9527       N->getOperand(0).getOpcode() != ISD::SELECT &&
9528       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
9529     return SDValue();
9530
9531   SmallVector<SDValue, 4> Inputs;
9532   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
9533   SmallPtrSet<SDNode *, 16> Visited;
9534
9535   // Visit all inputs, collect all binary operations (and, or, xor and
9536   // select) that are all fed by truncations.
9537   while (!BinOps.empty()) {
9538     SDValue BinOp = BinOps.back();
9539     BinOps.pop_back();
9540
9541     if (!Visited.insert(BinOp.getNode()).second)
9542       continue;
9543
9544     PromOps.push_back(BinOp);
9545
9546     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
9547       // The condition of the select is not promoted.
9548       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
9549         continue;
9550       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
9551         continue;
9552
9553       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
9554           isa<ConstantSDNode>(BinOp.getOperand(i))) {
9555         Inputs.push_back(BinOp.getOperand(i));
9556       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
9557                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
9558                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
9559                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
9560                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
9561         BinOps.push_back(BinOp.getOperand(i));
9562       } else {
9563         // We have an input that is not a truncation or another binary
9564         // operation; we'll abort this transformation.
9565         return SDValue();
9566       }
9567     }
9568   }
9569
9570   // The operands of a select that must be truncated when the select is
9571   // promoted because the operand is actually part of the to-be-promoted set.
9572   DenseMap<SDNode *, EVT> SelectTruncOp[2];
9573
9574   // Make sure that this is a self-contained cluster of operations (which
9575   // is not quite the same thing as saying that everything has only one
9576   // use).
9577   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9578     if (isa<ConstantSDNode>(Inputs[i]))
9579       continue;
9580
9581     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
9582                               UE = Inputs[i].getNode()->use_end();
9583          UI != UE; ++UI) {
9584       SDNode *User = *UI;
9585       if (User != N && !Visited.count(User))
9586         return SDValue();
9587
9588       // If we're going to promote the non-output-value operand(s) or SELECT or
9589       // SELECT_CC, record them for truncation.
9590       if (User->getOpcode() == ISD::SELECT) {
9591         if (User->getOperand(0) == Inputs[i])
9592           SelectTruncOp[0].insert(std::make_pair(User,
9593                                     User->getOperand(0).getValueType()));
9594       } else if (User->getOpcode() == ISD::SELECT_CC) {
9595         if (User->getOperand(0) == Inputs[i])
9596           SelectTruncOp[0].insert(std::make_pair(User,
9597                                     User->getOperand(0).getValueType()));
9598         if (User->getOperand(1) == Inputs[i])
9599           SelectTruncOp[1].insert(std::make_pair(User,
9600                                     User->getOperand(1).getValueType()));
9601       }
9602     }
9603   }
9604
9605   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
9606     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
9607                               UE = PromOps[i].getNode()->use_end();
9608          UI != UE; ++UI) {
9609       SDNode *User = *UI;
9610       if (User != N && !Visited.count(User))
9611         return SDValue();
9612
9613       // If we're going to promote the non-output-value operand(s) or SELECT or
9614       // SELECT_CC, record them for truncation.
9615       if (User->getOpcode() == ISD::SELECT) {
9616         if (User->getOperand(0) == PromOps[i])
9617           SelectTruncOp[0].insert(std::make_pair(User,
9618                                     User->getOperand(0).getValueType()));
9619       } else if (User->getOpcode() == ISD::SELECT_CC) {
9620         if (User->getOperand(0) == PromOps[i])
9621           SelectTruncOp[0].insert(std::make_pair(User,
9622                                     User->getOperand(0).getValueType()));
9623         if (User->getOperand(1) == PromOps[i])
9624           SelectTruncOp[1].insert(std::make_pair(User,
9625                                     User->getOperand(1).getValueType()));
9626       }
9627     }
9628   }
9629
9630   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
9631   bool ReallyNeedsExt = false;
9632   if (N->getOpcode() != ISD::ANY_EXTEND) {
9633     // If all of the inputs are not already sign/zero extended, then
9634     // we'll still need to do that at the end.
9635     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9636       if (isa<ConstantSDNode>(Inputs[i]))
9637         continue;
9638
9639       unsigned OpBits =
9640         Inputs[i].getOperand(0).getValueSizeInBits();
9641       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
9642
9643       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
9644            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
9645                                   APInt::getHighBitsSet(OpBits,
9646                                                         OpBits-PromBits))) ||
9647           (N->getOpcode() == ISD::SIGN_EXTEND &&
9648            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
9649              (OpBits-(PromBits-1)))) {
9650         ReallyNeedsExt = true;
9651         break;
9652       }
9653     }
9654   }
9655
9656   // Replace all inputs, either with the truncation operand, or a
9657   // truncation or extension to the final output type.
9658   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9659     // Constant inputs need to be replaced with the to-be-promoted nodes that
9660     // use them because they might have users outside of the cluster of
9661     // promoted nodes.
9662     if (isa<ConstantSDNode>(Inputs[i]))
9663       continue;
9664
9665     SDValue InSrc = Inputs[i].getOperand(0);
9666     if (Inputs[i].getValueType() == N->getValueType(0))
9667       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
9668     else if (N->getOpcode() == ISD::SIGN_EXTEND)
9669       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
9670         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
9671     else if (N->getOpcode() == ISD::ZERO_EXTEND)
9672       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
9673         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
9674     else
9675       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
9676         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
9677   }
9678
9679   // Replace all operations (these are all the same, but have a different
9680   // (promoted) return type). DAG.getNode will validate that the types of
9681   // a binary operator match, so go through the list in reverse so that
9682   // we've likely promoted both operands first.
9683   while (!PromOps.empty()) {
9684     SDValue PromOp = PromOps.back();
9685     PromOps.pop_back();
9686
9687     unsigned C;
9688     switch (PromOp.getOpcode()) {
9689     default:             C = 0; break;
9690     case ISD::SELECT:    C = 1; break;
9691     case ISD::SELECT_CC: C = 2; break;
9692     }
9693
9694     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
9695          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
9696         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
9697          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
9698       // The to-be-promoted operands of this node have not yet been
9699       // promoted (this should be rare because we're going through the
9700       // list backward, but if one of the operands has several users in
9701       // this cluster of to-be-promoted nodes, it is possible).
9702       PromOps.insert(PromOps.begin(), PromOp);
9703       continue;
9704     }
9705
9706     // For SELECT and SELECT_CC nodes, we do a similar check for any
9707     // to-be-promoted comparison inputs.
9708     if (PromOp.getOpcode() == ISD::SELECT ||
9709         PromOp.getOpcode() == ISD::SELECT_CC) {
9710       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
9711            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
9712           (SelectTruncOp[1].count(PromOp.getNode()) &&
9713            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
9714         PromOps.insert(PromOps.begin(), PromOp);
9715         continue;
9716       }
9717     }
9718
9719     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
9720                                 PromOp.getNode()->op_end());
9721
9722     // If this node has constant inputs, then they'll need to be promoted here.
9723     for (unsigned i = 0; i < 2; ++i) {
9724       if (!isa<ConstantSDNode>(Ops[C+i]))
9725         continue;
9726       if (Ops[C+i].getValueType() == N->getValueType(0))
9727         continue;
9728
9729       if (N->getOpcode() == ISD::SIGN_EXTEND)
9730         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
9731       else if (N->getOpcode() == ISD::ZERO_EXTEND)
9732         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
9733       else
9734         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
9735     }
9736
9737     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
9738     // truncate them again to the original value type.
9739     if (PromOp.getOpcode() == ISD::SELECT ||
9740         PromOp.getOpcode() == ISD::SELECT_CC) {
9741       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
9742       if (SI0 != SelectTruncOp[0].end())
9743         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
9744       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
9745       if (SI1 != SelectTruncOp[1].end())
9746         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
9747     }
9748
9749     DAG.ReplaceAllUsesOfValueWith(PromOp,
9750       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
9751   }
9752
9753   // Now we're left with the initial extension itself.
9754   if (!ReallyNeedsExt)
9755     return N->getOperand(0);
9756
9757   // To zero extend, just mask off everything except for the first bit (in the
9758   // i1 case).
9759   if (N->getOpcode() == ISD::ZERO_EXTEND)
9760     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
9761                        DAG.getConstant(APInt::getLowBitsSet(
9762                                          N->getValueSizeInBits(0), PromBits),
9763                                        dl, N->getValueType(0)));
9764
9765   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
9766          "Invalid extension type");
9767   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
9768   SDValue ShiftCst =
9769     DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
9770   return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
9771                      DAG.getNode(ISD::SHL, dl, N->getValueType(0),
9772                                  N->getOperand(0), ShiftCst), ShiftCst);
9773 }
9774
9775 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
9776                                               DAGCombinerInfo &DCI) const {
9777   assert((N->getOpcode() == ISD::SINT_TO_FP ||
9778           N->getOpcode() == ISD::UINT_TO_FP) &&
9779          "Need an int -> FP conversion node here");
9780
9781   if (!Subtarget.has64BitSupport())
9782     return SDValue();
9783
9784   SelectionDAG &DAG = DCI.DAG;
9785   SDLoc dl(N);
9786   SDValue Op(N, 0);
9787
9788   // Don't handle ppc_fp128 here or i1 conversions.
9789   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
9790     return SDValue();
9791   if (Op.getOperand(0).getValueType() == MVT::i1)
9792     return SDValue();
9793
9794   // For i32 intermediate values, unfortunately, the conversion functions
9795   // leave the upper 32 bits of the value are undefined. Within the set of
9796   // scalar instructions, we have no method for zero- or sign-extending the
9797   // value. Thus, we cannot handle i32 intermediate values here.
9798   if (Op.getOperand(0).getValueType() == MVT::i32)
9799     return SDValue();
9800
9801   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
9802          "UINT_TO_FP is supported only with FPCVT");
9803
9804   // If we have FCFIDS, then use it when converting to single-precision.
9805   // Otherwise, convert to double-precision and then round.
9806   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
9807                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
9808                                                             : PPCISD::FCFIDS)
9809                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
9810                                                             : PPCISD::FCFID);
9811   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
9812                   ? MVT::f32
9813                   : MVT::f64;
9814
9815   // If we're converting from a float, to an int, and back to a float again,
9816   // then we don't need the store/load pair at all.
9817   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
9818        Subtarget.hasFPCVT()) ||
9819       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
9820     SDValue Src = Op.getOperand(0).getOperand(0);
9821     if (Src.getValueType() == MVT::f32) {
9822       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
9823       DCI.AddToWorklist(Src.getNode());
9824     }
9825
9826     unsigned FCTOp =
9827       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
9828                                                         PPCISD::FCTIDUZ;
9829
9830     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
9831     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
9832
9833     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9834       FP = DAG.getNode(ISD::FP_ROUND, dl,
9835                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
9836       DCI.AddToWorklist(FP.getNode());
9837     }
9838
9839     return FP;
9840   }
9841
9842   return SDValue();
9843 }
9844
9845 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
9846 // builtins) into loads with swaps.
9847 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
9848                                               DAGCombinerInfo &DCI) const {
9849   SelectionDAG &DAG = DCI.DAG;
9850   SDLoc dl(N);
9851   SDValue Chain;
9852   SDValue Base;
9853   MachineMemOperand *MMO;
9854
9855   switch (N->getOpcode()) {
9856   default:
9857     llvm_unreachable("Unexpected opcode for little endian VSX load");
9858   case ISD::LOAD: {
9859     LoadSDNode *LD = cast<LoadSDNode>(N);
9860     Chain = LD->getChain();
9861     Base = LD->getBasePtr();
9862     MMO = LD->getMemOperand();
9863     // If the MMO suggests this isn't a load of a full vector, leave
9864     // things alone.  For a built-in, we have to make the change for
9865     // correctness, so if there is a size problem that will be a bug.
9866     if (MMO->getSize() < 16)
9867       return SDValue();
9868     break;
9869   }
9870   case ISD::INTRINSIC_W_CHAIN: {
9871     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
9872     Chain = Intrin->getChain();
9873     Base = Intrin->getBasePtr();
9874     MMO = Intrin->getMemOperand();
9875     break;
9876   }
9877   }
9878
9879   MVT VecTy = N->getValueType(0).getSimpleVT();
9880   SDValue LoadOps[] = { Chain, Base };
9881   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
9882                                          DAG.getVTList(VecTy, MVT::Other),
9883                                          LoadOps, VecTy, MMO);
9884   DCI.AddToWorklist(Load.getNode());
9885   Chain = Load.getValue(1);
9886   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
9887                              DAG.getVTList(VecTy, MVT::Other), Chain, Load);
9888   DCI.AddToWorklist(Swap.getNode());
9889   return Swap;
9890 }
9891
9892 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
9893 // builtins) into stores with swaps.
9894 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
9895                                                DAGCombinerInfo &DCI) const {
9896   SelectionDAG &DAG = DCI.DAG;
9897   SDLoc dl(N);
9898   SDValue Chain;
9899   SDValue Base;
9900   unsigned SrcOpnd;
9901   MachineMemOperand *MMO;
9902
9903   switch (N->getOpcode()) {
9904   default:
9905     llvm_unreachable("Unexpected opcode for little endian VSX store");
9906   case ISD::STORE: {
9907     StoreSDNode *ST = cast<StoreSDNode>(N);
9908     Chain = ST->getChain();
9909     Base = ST->getBasePtr();
9910     MMO = ST->getMemOperand();
9911     SrcOpnd = 1;
9912     // If the MMO suggests this isn't a store of a full vector, leave
9913     // things alone.  For a built-in, we have to make the change for
9914     // correctness, so if there is a size problem that will be a bug.
9915     if (MMO->getSize() < 16)
9916       return SDValue();
9917     break;
9918   }
9919   case ISD::INTRINSIC_VOID: {
9920     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
9921     Chain = Intrin->getChain();
9922     // Intrin->getBasePtr() oddly does not get what we want.
9923     Base = Intrin->getOperand(3);
9924     MMO = Intrin->getMemOperand();
9925     SrcOpnd = 2;
9926     break;
9927   }
9928   }
9929
9930   SDValue Src = N->getOperand(SrcOpnd);
9931   MVT VecTy = Src.getValueType().getSimpleVT();
9932   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
9933                              DAG.getVTList(VecTy, MVT::Other), Chain, Src);
9934   DCI.AddToWorklist(Swap.getNode());
9935   Chain = Swap.getValue(1);
9936   SDValue StoreOps[] = { Chain, Swap, Base };
9937   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
9938                                           DAG.getVTList(MVT::Other),
9939                                           StoreOps, VecTy, MMO);
9940   DCI.AddToWorklist(Store.getNode());
9941   return Store;
9942 }
9943
9944 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
9945                                              DAGCombinerInfo &DCI) const {
9946   SelectionDAG &DAG = DCI.DAG;
9947   SDLoc dl(N);
9948   switch (N->getOpcode()) {
9949   default: break;
9950   case PPCISD::SHL:
9951     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
9952       if (C->isNullValue())   // 0 << V -> 0.
9953         return N->getOperand(0);
9954     }
9955     break;
9956   case PPCISD::SRL:
9957     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
9958       if (C->isNullValue())   // 0 >>u V -> 0.
9959         return N->getOperand(0);
9960     }
9961     break;
9962   case PPCISD::SRA:
9963     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
9964       if (C->isNullValue() ||   //  0 >>s V -> 0.
9965           C->isAllOnesValue())    // -1 >>s V -> -1.
9966         return N->getOperand(0);
9967     }
9968     break;
9969   case ISD::SIGN_EXTEND:
9970   case ISD::ZERO_EXTEND:
9971   case ISD::ANY_EXTEND:
9972     return DAGCombineExtBoolTrunc(N, DCI);
9973   case ISD::TRUNCATE:
9974   case ISD::SETCC:
9975   case ISD::SELECT_CC:
9976     return DAGCombineTruncBoolExt(N, DCI);
9977   case ISD::SINT_TO_FP:
9978   case ISD::UINT_TO_FP:
9979     return combineFPToIntToFP(N, DCI);
9980   case ISD::STORE: {
9981     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
9982     if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
9983         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
9984         N->getOperand(1).getValueType() == MVT::i32 &&
9985         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
9986       SDValue Val = N->getOperand(1).getOperand(0);
9987       if (Val.getValueType() == MVT::f32) {
9988         Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
9989         DCI.AddToWorklist(Val.getNode());
9990       }
9991       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
9992       DCI.AddToWorklist(Val.getNode());
9993
9994       SDValue Ops[] = {
9995         N->getOperand(0), Val, N->getOperand(2),
9996         DAG.getValueType(N->getOperand(1).getValueType())
9997       };
9998
9999       Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
10000               DAG.getVTList(MVT::Other), Ops,
10001               cast<StoreSDNode>(N)->getMemoryVT(),
10002               cast<StoreSDNode>(N)->getMemOperand());
10003       DCI.AddToWorklist(Val.getNode());
10004       return Val;
10005     }
10006
10007     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
10008     if (cast<StoreSDNode>(N)->isUnindexed() &&
10009         N->getOperand(1).getOpcode() == ISD::BSWAP &&
10010         N->getOperand(1).getNode()->hasOneUse() &&
10011         (N->getOperand(1).getValueType() == MVT::i32 ||
10012          N->getOperand(1).getValueType() == MVT::i16 ||
10013          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
10014           N->getOperand(1).getValueType() == MVT::i64))) {
10015       SDValue BSwapOp = N->getOperand(1).getOperand(0);
10016       // Do an any-extend to 32-bits if this is a half-word input.
10017       if (BSwapOp.getValueType() == MVT::i16)
10018         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
10019
10020       SDValue Ops[] = {
10021         N->getOperand(0), BSwapOp, N->getOperand(2),
10022         DAG.getValueType(N->getOperand(1).getValueType())
10023       };
10024       return
10025         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
10026                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
10027                                 cast<StoreSDNode>(N)->getMemOperand());
10028     }
10029
10030     // For little endian, VSX stores require generating xxswapd/lxvd2x.
10031     EVT VT = N->getOperand(1).getValueType();
10032     if (VT.isSimple()) {
10033       MVT StoreVT = VT.getSimpleVT();
10034       if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
10035           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
10036            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
10037         return expandVSXStoreForLE(N, DCI);
10038     }
10039     break;
10040   }
10041   case ISD::LOAD: {
10042     LoadSDNode *LD = cast<LoadSDNode>(N);
10043     EVT VT = LD->getValueType(0);
10044
10045     // For little endian, VSX loads require generating lxvd2x/xxswapd.
10046     if (VT.isSimple()) {
10047       MVT LoadVT = VT.getSimpleVT();
10048       if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
10049           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
10050            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
10051         return expandVSXLoadForLE(N, DCI);
10052     }
10053
10054     EVT MemVT = LD->getMemoryVT();
10055     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
10056     unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
10057     Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
10058     unsigned ScalarABIAlignment = getDataLayout()->getABITypeAlignment(STy);
10059     if (LD->isUnindexed() && VT.isVector() &&
10060         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
10061           // P8 and later hardware should just use LOAD.
10062           !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
10063                                        VT == MVT::v4i32 || VT == MVT::v4f32)) ||
10064          (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
10065           LD->getAlignment() >= ScalarABIAlignment)) &&
10066         LD->getAlignment() < ABIAlignment) {
10067       // This is a type-legal unaligned Altivec or QPX load.
10068       SDValue Chain = LD->getChain();
10069       SDValue Ptr = LD->getBasePtr();
10070       bool isLittleEndian = Subtarget.isLittleEndian();
10071
10072       // This implements the loading of unaligned vectors as described in
10073       // the venerable Apple Velocity Engine overview. Specifically:
10074       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
10075       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
10076       //
10077       // The general idea is to expand a sequence of one or more unaligned
10078       // loads into an alignment-based permutation-control instruction (lvsl
10079       // or lvsr), a series of regular vector loads (which always truncate
10080       // their input address to an aligned address), and a series of
10081       // permutations.  The results of these permutations are the requested
10082       // loaded values.  The trick is that the last "extra" load is not taken
10083       // from the address you might suspect (sizeof(vector) bytes after the
10084       // last requested load), but rather sizeof(vector) - 1 bytes after the
10085       // last requested vector. The point of this is to avoid a page fault if
10086       // the base address happened to be aligned. This works because if the
10087       // base address is aligned, then adding less than a full vector length
10088       // will cause the last vector in the sequence to be (re)loaded.
10089       // Otherwise, the next vector will be fetched as you might suspect was
10090       // necessary.
10091
10092       // We might be able to reuse the permutation generation from
10093       // a different base address offset from this one by an aligned amount.
10094       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
10095       // optimization later.
10096       Intrinsic::ID Intr, IntrLD, IntrPerm;
10097       MVT PermCntlTy, PermTy, LDTy;
10098       if (Subtarget.hasAltivec()) {
10099         Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
10100                                  Intrinsic::ppc_altivec_lvsl;
10101         IntrLD = Intrinsic::ppc_altivec_lvx;
10102         IntrPerm = Intrinsic::ppc_altivec_vperm;
10103         PermCntlTy = MVT::v16i8;
10104         PermTy = MVT::v4i32;
10105         LDTy = MVT::v4i32;
10106       } else {
10107         Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
10108                                        Intrinsic::ppc_qpx_qvlpcls;
10109         IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
10110                                        Intrinsic::ppc_qpx_qvlfs;
10111         IntrPerm = Intrinsic::ppc_qpx_qvfperm;
10112         PermCntlTy = MVT::v4f64;
10113         PermTy = MVT::v4f64;
10114         LDTy = MemVT.getSimpleVT();
10115       }
10116
10117       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
10118
10119       // Create the new MMO for the new base load. It is like the original MMO,
10120       // but represents an area in memory almost twice the vector size centered
10121       // on the original address. If the address is unaligned, we might start
10122       // reading up to (sizeof(vector)-1) bytes below the address of the
10123       // original unaligned load.
10124       MachineFunction &MF = DAG.getMachineFunction();
10125       MachineMemOperand *BaseMMO =
10126         MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1,
10127                                 2*MemVT.getStoreSize()-1);
10128
10129       // Create the new base load.
10130       SDValue LDXIntID = DAG.getTargetConstant(IntrLD, dl, getPointerTy());
10131       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
10132       SDValue BaseLoad =
10133         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
10134                                 DAG.getVTList(PermTy, MVT::Other),
10135                                 BaseLoadOps, LDTy, BaseMMO);
10136
10137       // Note that the value of IncOffset (which is provided to the next
10138       // load's pointer info offset value, and thus used to calculate the
10139       // alignment), and the value of IncValue (which is actually used to
10140       // increment the pointer value) are different! This is because we
10141       // require the next load to appear to be aligned, even though it
10142       // is actually offset from the base pointer by a lesser amount.
10143       int IncOffset = VT.getSizeInBits() / 8;
10144       int IncValue = IncOffset;
10145
10146       // Walk (both up and down) the chain looking for another load at the real
10147       // (aligned) offset (the alignment of the other load does not matter in
10148       // this case). If found, then do not use the offset reduction trick, as
10149       // that will prevent the loads from being later combined (as they would
10150       // otherwise be duplicates).
10151       if (!findConsecutiveLoad(LD, DAG))
10152         --IncValue;
10153
10154       SDValue Increment = DAG.getConstant(IncValue, dl, getPointerTy());
10155       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
10156
10157       MachineMemOperand *ExtraMMO =
10158         MF.getMachineMemOperand(LD->getMemOperand(),
10159                                 1, 2*MemVT.getStoreSize()-1);
10160       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
10161       SDValue ExtraLoad =
10162         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
10163                                 DAG.getVTList(PermTy, MVT::Other),
10164                                 ExtraLoadOps, LDTy, ExtraMMO);
10165
10166       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
10167         BaseLoad.getValue(1), ExtraLoad.getValue(1));
10168
10169       // Because vperm has a big-endian bias, we must reverse the order
10170       // of the input vectors and complement the permute control vector
10171       // when generating little endian code.  We have already handled the
10172       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
10173       // and ExtraLoad here.
10174       SDValue Perm;
10175       if (isLittleEndian)
10176         Perm = BuildIntrinsicOp(IntrPerm,
10177                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
10178       else
10179         Perm = BuildIntrinsicOp(IntrPerm,
10180                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
10181
10182       if (VT != PermTy)
10183         Perm = Subtarget.hasAltivec() ?
10184                  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
10185                  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
10186                                DAG.getTargetConstant(1, dl, MVT::i64));
10187                                // second argument is 1 because this rounding
10188                                // is always exact.
10189
10190       // The output of the permutation is our loaded result, the TokenFactor is
10191       // our new chain.
10192       DCI.CombineTo(N, Perm, TF);
10193       return SDValue(N, 0);
10194     }
10195     }
10196     break;
10197     case ISD::INTRINSIC_WO_CHAIN: {
10198       bool isLittleEndian = Subtarget.isLittleEndian();
10199       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
10200       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
10201                                            : Intrinsic::ppc_altivec_lvsl);
10202       if ((IID == Intr ||
10203            IID == Intrinsic::ppc_qpx_qvlpcld  ||
10204            IID == Intrinsic::ppc_qpx_qvlpcls) &&
10205         N->getOperand(1)->getOpcode() == ISD::ADD) {
10206         SDValue Add = N->getOperand(1);
10207
10208         int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
10209                    5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
10210
10211         if (DAG.MaskedValueIsZero(
10212                 Add->getOperand(1),
10213                 APInt::getAllOnesValue(Bits /* alignment */)
10214                     .zext(
10215                         Add.getValueType().getScalarType().getSizeInBits()))) {
10216           SDNode *BasePtr = Add->getOperand(0).getNode();
10217           for (SDNode::use_iterator UI = BasePtr->use_begin(),
10218                                     UE = BasePtr->use_end();
10219                UI != UE; ++UI) {
10220             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
10221                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
10222               // We've found another LVSL/LVSR, and this address is an aligned
10223               // multiple of that one. The results will be the same, so use the
10224               // one we've just found instead.
10225
10226               return SDValue(*UI, 0);
10227             }
10228           }
10229         }
10230
10231         if (isa<ConstantSDNode>(Add->getOperand(1))) {
10232           SDNode *BasePtr = Add->getOperand(0).getNode();
10233           for (SDNode::use_iterator UI = BasePtr->use_begin(),
10234                UE = BasePtr->use_end(); UI != UE; ++UI) {
10235             if (UI->getOpcode() == ISD::ADD &&
10236                 isa<ConstantSDNode>(UI->getOperand(1)) &&
10237                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
10238                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
10239                 (1ULL << Bits) == 0) {
10240               SDNode *OtherAdd = *UI;
10241               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
10242                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
10243                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
10244                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
10245                   return SDValue(*VI, 0);
10246                 }
10247               }
10248             }
10249           }
10250         }
10251       }
10252     }
10253
10254     break;
10255   case ISD::INTRINSIC_W_CHAIN: {
10256     // For little endian, VSX loads require generating lxvd2x/xxswapd.
10257     if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
10258       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10259       default:
10260         break;
10261       case Intrinsic::ppc_vsx_lxvw4x:
10262       case Intrinsic::ppc_vsx_lxvd2x:
10263         return expandVSXLoadForLE(N, DCI);
10264       }
10265     }
10266     break;
10267   }
10268   case ISD::INTRINSIC_VOID: {
10269     // For little endian, VSX stores require generating xxswapd/stxvd2x.
10270     if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
10271       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10272       default:
10273         break;
10274       case Intrinsic::ppc_vsx_stxvw4x:
10275       case Intrinsic::ppc_vsx_stxvd2x:
10276         return expandVSXStoreForLE(N, DCI);
10277       }
10278     }
10279     break;
10280   }
10281   case ISD::BSWAP:
10282     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
10283     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
10284         N->getOperand(0).hasOneUse() &&
10285         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
10286          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
10287           N->getValueType(0) == MVT::i64))) {
10288       SDValue Load = N->getOperand(0);
10289       LoadSDNode *LD = cast<LoadSDNode>(Load);
10290       // Create the byte-swapping load.
10291       SDValue Ops[] = {
10292         LD->getChain(),    // Chain
10293         LD->getBasePtr(),  // Ptr
10294         DAG.getValueType(N->getValueType(0)) // VT
10295       };
10296       SDValue BSLoad =
10297         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
10298                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
10299                                               MVT::i64 : MVT::i32, MVT::Other),
10300                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
10301
10302       // If this is an i16 load, insert the truncate.
10303       SDValue ResVal = BSLoad;
10304       if (N->getValueType(0) == MVT::i16)
10305         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
10306
10307       // First, combine the bswap away.  This makes the value produced by the
10308       // load dead.
10309       DCI.CombineTo(N, ResVal);
10310
10311       // Next, combine the load away, we give it a bogus result value but a real
10312       // chain result.  The result value is dead because the bswap is dead.
10313       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
10314
10315       // Return N so it doesn't get rechecked!
10316       return SDValue(N, 0);
10317     }
10318
10319     break;
10320   case PPCISD::VCMP: {
10321     // If a VCMPo node already exists with exactly the same operands as this
10322     // node, use its result instead of this node (VCMPo computes both a CR6 and
10323     // a normal output).
10324     //
10325     if (!N->getOperand(0).hasOneUse() &&
10326         !N->getOperand(1).hasOneUse() &&
10327         !N->getOperand(2).hasOneUse()) {
10328
10329       // Scan all of the users of the LHS, looking for VCMPo's that match.
10330       SDNode *VCMPoNode = nullptr;
10331
10332       SDNode *LHSN = N->getOperand(0).getNode();
10333       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
10334            UI != E; ++UI)
10335         if (UI->getOpcode() == PPCISD::VCMPo &&
10336             UI->getOperand(1) == N->getOperand(1) &&
10337             UI->getOperand(2) == N->getOperand(2) &&
10338             UI->getOperand(0) == N->getOperand(0)) {
10339           VCMPoNode = *UI;
10340           break;
10341         }
10342
10343       // If there is no VCMPo node, or if the flag value has a single use, don't
10344       // transform this.
10345       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
10346         break;
10347
10348       // Look at the (necessarily single) use of the flag value.  If it has a
10349       // chain, this transformation is more complex.  Note that multiple things
10350       // could use the value result, which we should ignore.
10351       SDNode *FlagUser = nullptr;
10352       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
10353            FlagUser == nullptr; ++UI) {
10354         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
10355         SDNode *User = *UI;
10356         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
10357           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
10358             FlagUser = User;
10359             break;
10360           }
10361         }
10362       }
10363
10364       // If the user is a MFOCRF instruction, we know this is safe.
10365       // Otherwise we give up for right now.
10366       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
10367         return SDValue(VCMPoNode, 0);
10368     }
10369     break;
10370   }
10371   case ISD::BRCOND: {
10372     SDValue Cond = N->getOperand(1);
10373     SDValue Target = N->getOperand(2);
10374
10375     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
10376         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
10377           Intrinsic::ppc_is_decremented_ctr_nonzero) {
10378
10379       // We now need to make the intrinsic dead (it cannot be instruction
10380       // selected).
10381       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
10382       assert(Cond.getNode()->hasOneUse() &&
10383              "Counter decrement has more than one use");
10384
10385       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
10386                          N->getOperand(0), Target);
10387     }
10388   }
10389   break;
10390   case ISD::BR_CC: {
10391     // If this is a branch on an altivec predicate comparison, lower this so
10392     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
10393     // lowering is done pre-legalize, because the legalizer lowers the predicate
10394     // compare down to code that is difficult to reassemble.
10395     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
10396     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
10397
10398     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
10399     // value. If so, pass-through the AND to get to the intrinsic.
10400     if (LHS.getOpcode() == ISD::AND &&
10401         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
10402         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
10403           Intrinsic::ppc_is_decremented_ctr_nonzero &&
10404         isa<ConstantSDNode>(LHS.getOperand(1)) &&
10405         !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
10406           isZero())
10407       LHS = LHS.getOperand(0);
10408
10409     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
10410         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
10411           Intrinsic::ppc_is_decremented_ctr_nonzero &&
10412         isa<ConstantSDNode>(RHS)) {
10413       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
10414              "Counter decrement comparison is not EQ or NE");
10415
10416       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
10417       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
10418                     (CC == ISD::SETNE && !Val);
10419
10420       // We now need to make the intrinsic dead (it cannot be instruction
10421       // selected).
10422       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
10423       assert(LHS.getNode()->hasOneUse() &&
10424              "Counter decrement has more than one use");
10425
10426       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
10427                          N->getOperand(0), N->getOperand(4));
10428     }
10429
10430     int CompareOpc;
10431     bool isDot;
10432
10433     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
10434         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
10435         getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
10436       assert(isDot && "Can't compare against a vector result!");
10437
10438       // If this is a comparison against something other than 0/1, then we know
10439       // that the condition is never/always true.
10440       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
10441       if (Val != 0 && Val != 1) {
10442         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
10443           return N->getOperand(0);
10444         // Always !=, turn it into an unconditional branch.
10445         return DAG.getNode(ISD::BR, dl, MVT::Other,
10446                            N->getOperand(0), N->getOperand(4));
10447       }
10448
10449       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
10450
10451       // Create the PPCISD altivec 'dot' comparison node.
10452       SDValue Ops[] = {
10453         LHS.getOperand(2),  // LHS of compare
10454         LHS.getOperand(3),  // RHS of compare
10455         DAG.getConstant(CompareOpc, dl, MVT::i32)
10456       };
10457       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
10458       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
10459
10460       // Unpack the result based on how the target uses it.
10461       PPC::Predicate CompOpc;
10462       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
10463       default:  // Can't happen, don't crash on invalid number though.
10464       case 0:   // Branch on the value of the EQ bit of CR6.
10465         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
10466         break;
10467       case 1:   // Branch on the inverted value of the EQ bit of CR6.
10468         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
10469         break;
10470       case 2:   // Branch on the value of the LT bit of CR6.
10471         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
10472         break;
10473       case 3:   // Branch on the inverted value of the LT bit of CR6.
10474         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
10475         break;
10476       }
10477
10478       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
10479                          DAG.getConstant(CompOpc, dl, MVT::i32),
10480                          DAG.getRegister(PPC::CR6, MVT::i32),
10481                          N->getOperand(4), CompNode.getValue(1));
10482     }
10483     break;
10484   }
10485   }
10486
10487   return SDValue();
10488 }
10489
10490 SDValue
10491 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10492                                   SelectionDAG &DAG,
10493                                   std::vector<SDNode *> *Created) const {
10494   // fold (sdiv X, pow2)
10495   EVT VT = N->getValueType(0);
10496   if (VT == MVT::i64 && !Subtarget.isPPC64())
10497     return SDValue();
10498   if ((VT != MVT::i32 && VT != MVT::i64) ||
10499       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
10500     return SDValue();
10501
10502   SDLoc DL(N);
10503   SDValue N0 = N->getOperand(0);
10504
10505   bool IsNegPow2 = (-Divisor).isPowerOf2();
10506   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
10507   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
10508
10509   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
10510   if (Created)
10511     Created->push_back(Op.getNode());
10512
10513   if (IsNegPow2) {
10514     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
10515     if (Created)
10516       Created->push_back(Op.getNode());
10517   }
10518
10519   return Op;
10520 }
10521
10522 //===----------------------------------------------------------------------===//
10523 // Inline Assembly Support
10524 //===----------------------------------------------------------------------===//
10525
10526 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
10527                                                       APInt &KnownZero,
10528                                                       APInt &KnownOne,
10529                                                       const SelectionDAG &DAG,
10530                                                       unsigned Depth) const {
10531   KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
10532   switch (Op.getOpcode()) {
10533   default: break;
10534   case PPCISD::LBRX: {
10535     // lhbrx is known to have the top bits cleared out.
10536     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
10537       KnownZero = 0xFFFF0000;
10538     break;
10539   }
10540   case ISD::INTRINSIC_WO_CHAIN: {
10541     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
10542     default: break;
10543     case Intrinsic::ppc_altivec_vcmpbfp_p:
10544     case Intrinsic::ppc_altivec_vcmpeqfp_p:
10545     case Intrinsic::ppc_altivec_vcmpequb_p:
10546     case Intrinsic::ppc_altivec_vcmpequh_p:
10547     case Intrinsic::ppc_altivec_vcmpequw_p:
10548     case Intrinsic::ppc_altivec_vcmpequd_p:
10549     case Intrinsic::ppc_altivec_vcmpgefp_p:
10550     case Intrinsic::ppc_altivec_vcmpgtfp_p:
10551     case Intrinsic::ppc_altivec_vcmpgtsb_p:
10552     case Intrinsic::ppc_altivec_vcmpgtsh_p:
10553     case Intrinsic::ppc_altivec_vcmpgtsw_p:
10554     case Intrinsic::ppc_altivec_vcmpgtsd_p:
10555     case Intrinsic::ppc_altivec_vcmpgtub_p:
10556     case Intrinsic::ppc_altivec_vcmpgtuh_p:
10557     case Intrinsic::ppc_altivec_vcmpgtuw_p:
10558     case Intrinsic::ppc_altivec_vcmpgtud_p:
10559       KnownZero = ~1U;  // All bits but the low one are known to be zero.
10560       break;
10561     }
10562   }
10563   }
10564 }
10565
10566 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
10567   switch (Subtarget.getDarwinDirective()) {
10568   default: break;
10569   case PPC::DIR_970:
10570   case PPC::DIR_PWR4:
10571   case PPC::DIR_PWR5:
10572   case PPC::DIR_PWR5X:
10573   case PPC::DIR_PWR6:
10574   case PPC::DIR_PWR6X:
10575   case PPC::DIR_PWR7:
10576   case PPC::DIR_PWR8: {
10577     if (!ML)
10578       break;
10579
10580     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
10581
10582     // For small loops (between 5 and 8 instructions), align to a 32-byte
10583     // boundary so that the entire loop fits in one instruction-cache line.
10584     uint64_t LoopSize = 0;
10585     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
10586       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J)
10587         LoopSize += TII->GetInstSizeInBytes(J);
10588
10589     if (LoopSize > 16 && LoopSize <= 32)
10590       return 5;
10591
10592     break;
10593   }
10594   }
10595
10596   return TargetLowering::getPrefLoopAlignment(ML);
10597 }
10598
10599 /// getConstraintType - Given a constraint, return the type of
10600 /// constraint it is for this target.
10601 PPCTargetLowering::ConstraintType
10602 PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
10603   if (Constraint.size() == 1) {
10604     switch (Constraint[0]) {
10605     default: break;
10606     case 'b':
10607     case 'r':
10608     case 'f':
10609     case 'v':
10610     case 'y':
10611       return C_RegisterClass;
10612     case 'Z':
10613       // FIXME: While Z does indicate a memory constraint, it specifically
10614       // indicates an r+r address (used in conjunction with the 'y' modifier
10615       // in the replacement string). Currently, we're forcing the base
10616       // register to be r0 in the asm printer (which is interpreted as zero)
10617       // and forming the complete address in the second register. This is
10618       // suboptimal.
10619       return C_Memory;
10620     }
10621   } else if (Constraint == "wc") { // individual CR bits.
10622     return C_RegisterClass;
10623   } else if (Constraint == "wa" || Constraint == "wd" ||
10624              Constraint == "wf" || Constraint == "ws") {
10625     return C_RegisterClass; // VSX registers.
10626   }
10627   return TargetLowering::getConstraintType(Constraint);
10628 }
10629
10630 /// Examine constraint type and operand type and determine a weight value.
10631 /// This object must already have been set up with the operand type
10632 /// and the current alternative constraint selected.
10633 TargetLowering::ConstraintWeight
10634 PPCTargetLowering::getSingleConstraintMatchWeight(
10635     AsmOperandInfo &info, const char *constraint) const {
10636   ConstraintWeight weight = CW_Invalid;
10637   Value *CallOperandVal = info.CallOperandVal;
10638     // If we don't have a value, we can't do a match,
10639     // but allow it at the lowest weight.
10640   if (!CallOperandVal)
10641     return CW_Default;
10642   Type *type = CallOperandVal->getType();
10643
10644   // Look at the constraint type.
10645   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
10646     return CW_Register; // an individual CR bit.
10647   else if ((StringRef(constraint) == "wa" ||
10648             StringRef(constraint) == "wd" ||
10649             StringRef(constraint) == "wf") &&
10650            type->isVectorTy())
10651     return CW_Register;
10652   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
10653     return CW_Register;
10654
10655   switch (*constraint) {
10656   default:
10657     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
10658     break;
10659   case 'b':
10660     if (type->isIntegerTy())
10661       weight = CW_Register;
10662     break;
10663   case 'f':
10664     if (type->isFloatTy())
10665       weight = CW_Register;
10666     break;
10667   case 'd':
10668     if (type->isDoubleTy())
10669       weight = CW_Register;
10670     break;
10671   case 'v':
10672     if (type->isVectorTy())
10673       weight = CW_Register;
10674     break;
10675   case 'y':
10676     weight = CW_Register;
10677     break;
10678   case 'Z':
10679     weight = CW_Memory;
10680     break;
10681   }
10682   return weight;
10683 }
10684
10685 std::pair<unsigned, const TargetRegisterClass *>
10686 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
10687                                                 const std::string &Constraint,
10688                                                 MVT VT) const {
10689   if (Constraint.size() == 1) {
10690     // GCC RS6000 Constraint Letters
10691     switch (Constraint[0]) {
10692     case 'b':   // R1-R31
10693       if (VT == MVT::i64 && Subtarget.isPPC64())
10694         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
10695       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
10696     case 'r':   // R0-R31
10697       if (VT == MVT::i64 && Subtarget.isPPC64())
10698         return std::make_pair(0U, &PPC::G8RCRegClass);
10699       return std::make_pair(0U, &PPC::GPRCRegClass);
10700     case 'f':
10701       if (VT == MVT::f32 || VT == MVT::i32)
10702         return std::make_pair(0U, &PPC::F4RCRegClass);
10703       if (VT == MVT::f64 || VT == MVT::i64)
10704         return std::make_pair(0U, &PPC::F8RCRegClass);
10705       if (VT == MVT::v4f64 && Subtarget.hasQPX())
10706         return std::make_pair(0U, &PPC::QFRCRegClass);
10707       if (VT == MVT::v4f32 && Subtarget.hasQPX())
10708         return std::make_pair(0U, &PPC::QSRCRegClass);
10709       break;
10710     case 'v':
10711       if (VT == MVT::v4f64 && Subtarget.hasQPX())
10712         return std::make_pair(0U, &PPC::QFRCRegClass);
10713       if (VT == MVT::v4f32 && Subtarget.hasQPX())
10714         return std::make_pair(0U, &PPC::QSRCRegClass);
10715       return std::make_pair(0U, &PPC::VRRCRegClass);
10716     case 'y':   // crrc
10717       return std::make_pair(0U, &PPC::CRRCRegClass);
10718     }
10719   } else if (Constraint == "wc") { // an individual CR bit.
10720     return std::make_pair(0U, &PPC::CRBITRCRegClass);
10721   } else if (Constraint == "wa" || Constraint == "wd" ||
10722              Constraint == "wf") {
10723     return std::make_pair(0U, &PPC::VSRCRegClass);
10724   } else if (Constraint == "ws") {
10725     if (VT == MVT::f32)
10726       return std::make_pair(0U, &PPC::VSSRCRegClass);
10727     else
10728       return std::make_pair(0U, &PPC::VSFRCRegClass);
10729   }
10730
10731   std::pair<unsigned, const TargetRegisterClass *> R =
10732       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
10733
10734   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
10735   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
10736   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
10737   // register.
10738   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
10739   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
10740   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
10741       PPC::GPRCRegClass.contains(R.first))
10742     return std::make_pair(TRI->getMatchingSuperReg(R.first,
10743                             PPC::sub_32, &PPC::G8RCRegClass),
10744                           &PPC::G8RCRegClass);
10745
10746   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
10747   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
10748     R.first = PPC::CR0;
10749     R.second = &PPC::CRRCRegClass;
10750   }
10751
10752   return R;
10753 }
10754
10755
10756 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
10757 /// vector.  If it is invalid, don't add anything to Ops.
10758 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
10759                                                      std::string &Constraint,
10760                                                      std::vector<SDValue>&Ops,
10761                                                      SelectionDAG &DAG) const {
10762   SDValue Result;
10763
10764   // Only support length 1 constraints.
10765   if (Constraint.length() > 1) return;
10766
10767   char Letter = Constraint[0];
10768   switch (Letter) {
10769   default: break;
10770   case 'I':
10771   case 'J':
10772   case 'K':
10773   case 'L':
10774   case 'M':
10775   case 'N':
10776   case 'O':
10777   case 'P': {
10778     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
10779     if (!CST) return; // Must be an immediate to match.
10780     SDLoc dl(Op);
10781     int64_t Value = CST->getSExtValue();
10782     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
10783                          // numbers are printed as such.
10784     switch (Letter) {
10785     default: llvm_unreachable("Unknown constraint letter!");
10786     case 'I':  // "I" is a signed 16-bit constant.
10787       if (isInt<16>(Value))
10788         Result = DAG.getTargetConstant(Value, dl, TCVT);
10789       break;
10790     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
10791       if (isShiftedUInt<16, 16>(Value))
10792         Result = DAG.getTargetConstant(Value, dl, TCVT);
10793       break;
10794     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
10795       if (isShiftedInt<16, 16>(Value))
10796         Result = DAG.getTargetConstant(Value, dl, TCVT);
10797       break;
10798     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
10799       if (isUInt<16>(Value))
10800         Result = DAG.getTargetConstant(Value, dl, TCVT);
10801       break;
10802     case 'M':  // "M" is a constant that is greater than 31.
10803       if (Value > 31)
10804         Result = DAG.getTargetConstant(Value, dl, TCVT);
10805       break;
10806     case 'N':  // "N" is a positive constant that is an exact power of two.
10807       if (Value > 0 && isPowerOf2_64(Value))
10808         Result = DAG.getTargetConstant(Value, dl, TCVT);
10809       break;
10810     case 'O':  // "O" is the constant zero.
10811       if (Value == 0)
10812         Result = DAG.getTargetConstant(Value, dl, TCVT);
10813       break;
10814     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
10815       if (isInt<16>(-Value))
10816         Result = DAG.getTargetConstant(Value, dl, TCVT);
10817       break;
10818     }
10819     break;
10820   }
10821   }
10822
10823   if (Result.getNode()) {
10824     Ops.push_back(Result);
10825     return;
10826   }
10827
10828   // Handle standard constraint letters.
10829   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
10830 }
10831
10832 // isLegalAddressingMode - Return true if the addressing mode represented
10833 // by AM is legal for this target, for a load/store of the specified type.
10834 bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
10835                                               Type *Ty) const {
10836   // PPC does not allow r+i addressing modes for vectors!
10837   if (Ty->isVectorTy() && AM.BaseOffs != 0)
10838     return false;
10839
10840   // PPC allows a sign-extended 16-bit immediate field.
10841   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
10842     return false;
10843
10844   // No global is ever allowed as a base.
10845   if (AM.BaseGV)
10846     return false;
10847
10848   // PPC only support r+r,
10849   switch (AM.Scale) {
10850   case 0:  // "r+i" or just "i", depending on HasBaseReg.
10851     break;
10852   case 1:
10853     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
10854       return false;
10855     // Otherwise we have r+r or r+i.
10856     break;
10857   case 2:
10858     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
10859       return false;
10860     // Allow 2*r as r+r.
10861     break;
10862   default:
10863     // No other scales are supported.
10864     return false;
10865   }
10866
10867   return true;
10868 }
10869
10870 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
10871                                            SelectionDAG &DAG) const {
10872   MachineFunction &MF = DAG.getMachineFunction();
10873   MachineFrameInfo *MFI = MF.getFrameInfo();
10874   MFI->setReturnAddressIsTaken(true);
10875
10876   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
10877     return SDValue();
10878
10879   SDLoc dl(Op);
10880   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10881
10882   // Make sure the function does not optimize away the store of the RA to
10883   // the stack.
10884   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
10885   FuncInfo->setLRStoreRequired();
10886   bool isPPC64 = Subtarget.isPPC64();
10887
10888   if (Depth > 0) {
10889     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
10890     SDValue Offset =
10891         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
10892                         isPPC64 ? MVT::i64 : MVT::i32);
10893     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
10894                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
10895                                    FrameAddr, Offset),
10896                        MachinePointerInfo(), false, false, false, 0);
10897   }
10898
10899   // Just load the return address off the stack.
10900   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
10901   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
10902                      RetAddrFI, MachinePointerInfo(), false, false, false, 0);
10903 }
10904
10905 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
10906                                           SelectionDAG &DAG) const {
10907   SDLoc dl(Op);
10908   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10909
10910   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
10911   bool isPPC64 = PtrVT == MVT::i64;
10912
10913   MachineFunction &MF = DAG.getMachineFunction();
10914   MachineFrameInfo *MFI = MF.getFrameInfo();
10915   MFI->setFrameAddressIsTaken(true);
10916
10917   // Naked functions never have a frame pointer, and so we use r1. For all
10918   // other functions, this decision must be delayed until during PEI.
10919   unsigned FrameReg;
10920   if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
10921     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
10922   else
10923     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
10924
10925   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
10926                                          PtrVT);
10927   while (Depth--)
10928     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
10929                             FrameAddr, MachinePointerInfo(), false, false,
10930                             false, 0);
10931   return FrameAddr;
10932 }
10933
10934 // FIXME? Maybe this could be a TableGen attribute on some registers and
10935 // this table could be generated automatically from RegInfo.
10936 unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
10937                                               EVT VT) const {
10938   bool isPPC64 = Subtarget.isPPC64();
10939   bool isDarwinABI = Subtarget.isDarwinABI();
10940
10941   if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
10942       (!isPPC64 && VT != MVT::i32))
10943     report_fatal_error("Invalid register global variable type");
10944
10945   bool is64Bit = isPPC64 && VT == MVT::i64;
10946   unsigned Reg = StringSwitch<unsigned>(RegName)
10947                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
10948                    .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
10949                    .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
10950                                   (is64Bit ? PPC::X13 : PPC::R13))
10951                    .Default(0);
10952
10953   if (Reg)
10954     return Reg;
10955   report_fatal_error("Invalid register name global variable");
10956 }
10957
10958 bool
10959 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
10960   // The PowerPC target isn't yet aware of offsets.
10961   return false;
10962 }
10963
10964 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
10965                                            const CallInst &I,
10966                                            unsigned Intrinsic) const {
10967
10968   switch (Intrinsic) {
10969   case Intrinsic::ppc_qpx_qvlfd:
10970   case Intrinsic::ppc_qpx_qvlfs:
10971   case Intrinsic::ppc_qpx_qvlfcd:
10972   case Intrinsic::ppc_qpx_qvlfcs:
10973   case Intrinsic::ppc_qpx_qvlfiwa:
10974   case Intrinsic::ppc_qpx_qvlfiwz:
10975   case Intrinsic::ppc_altivec_lvx:
10976   case Intrinsic::ppc_altivec_lvxl:
10977   case Intrinsic::ppc_altivec_lvebx:
10978   case Intrinsic::ppc_altivec_lvehx:
10979   case Intrinsic::ppc_altivec_lvewx:
10980   case Intrinsic::ppc_vsx_lxvd2x:
10981   case Intrinsic::ppc_vsx_lxvw4x: {
10982     EVT VT;
10983     switch (Intrinsic) {
10984     case Intrinsic::ppc_altivec_lvebx:
10985       VT = MVT::i8;
10986       break;
10987     case Intrinsic::ppc_altivec_lvehx:
10988       VT = MVT::i16;
10989       break;
10990     case Intrinsic::ppc_altivec_lvewx:
10991       VT = MVT::i32;
10992       break;
10993     case Intrinsic::ppc_vsx_lxvd2x:
10994       VT = MVT::v2f64;
10995       break;
10996     case Intrinsic::ppc_qpx_qvlfd:
10997       VT = MVT::v4f64;
10998       break;
10999     case Intrinsic::ppc_qpx_qvlfs:
11000       VT = MVT::v4f32;
11001       break;
11002     case Intrinsic::ppc_qpx_qvlfcd:
11003       VT = MVT::v2f64;
11004       break;
11005     case Intrinsic::ppc_qpx_qvlfcs:
11006       VT = MVT::v2f32;
11007       break;
11008     default:
11009       VT = MVT::v4i32;
11010       break;
11011     }
11012
11013     Info.opc = ISD::INTRINSIC_W_CHAIN;
11014     Info.memVT = VT;
11015     Info.ptrVal = I.getArgOperand(0);
11016     Info.offset = -VT.getStoreSize()+1;
11017     Info.size = 2*VT.getStoreSize()-1;
11018     Info.align = 1;
11019     Info.vol = false;
11020     Info.readMem = true;
11021     Info.writeMem = false;
11022     return true;
11023   }
11024   case Intrinsic::ppc_qpx_qvlfda:
11025   case Intrinsic::ppc_qpx_qvlfsa:
11026   case Intrinsic::ppc_qpx_qvlfcda:
11027   case Intrinsic::ppc_qpx_qvlfcsa:
11028   case Intrinsic::ppc_qpx_qvlfiwaa:
11029   case Intrinsic::ppc_qpx_qvlfiwza: {
11030     EVT VT;
11031     switch (Intrinsic) {
11032     case Intrinsic::ppc_qpx_qvlfda:
11033       VT = MVT::v4f64;
11034       break;
11035     case Intrinsic::ppc_qpx_qvlfsa:
11036       VT = MVT::v4f32;
11037       break;
11038     case Intrinsic::ppc_qpx_qvlfcda:
11039       VT = MVT::v2f64;
11040       break;
11041     case Intrinsic::ppc_qpx_qvlfcsa:
11042       VT = MVT::v2f32;
11043       break;
11044     default:
11045       VT = MVT::v4i32;
11046       break;
11047     }
11048
11049     Info.opc = ISD::INTRINSIC_W_CHAIN;
11050     Info.memVT = VT;
11051     Info.ptrVal = I.getArgOperand(0);
11052     Info.offset = 0;
11053     Info.size = VT.getStoreSize();
11054     Info.align = 1;
11055     Info.vol = false;
11056     Info.readMem = true;
11057     Info.writeMem = false;
11058     return true;
11059   }
11060   case Intrinsic::ppc_qpx_qvstfd:
11061   case Intrinsic::ppc_qpx_qvstfs:
11062   case Intrinsic::ppc_qpx_qvstfcd:
11063   case Intrinsic::ppc_qpx_qvstfcs:
11064   case Intrinsic::ppc_qpx_qvstfiw:
11065   case Intrinsic::ppc_altivec_stvx:
11066   case Intrinsic::ppc_altivec_stvxl:
11067   case Intrinsic::ppc_altivec_stvebx:
11068   case Intrinsic::ppc_altivec_stvehx:
11069   case Intrinsic::ppc_altivec_stvewx:
11070   case Intrinsic::ppc_vsx_stxvd2x:
11071   case Intrinsic::ppc_vsx_stxvw4x: {
11072     EVT VT;
11073     switch (Intrinsic) {
11074     case Intrinsic::ppc_altivec_stvebx:
11075       VT = MVT::i8;
11076       break;
11077     case Intrinsic::ppc_altivec_stvehx:
11078       VT = MVT::i16;
11079       break;
11080     case Intrinsic::ppc_altivec_stvewx:
11081       VT = MVT::i32;
11082       break;
11083     case Intrinsic::ppc_vsx_stxvd2x:
11084       VT = MVT::v2f64;
11085       break;
11086     case Intrinsic::ppc_qpx_qvstfd:
11087       VT = MVT::v4f64;
11088       break;
11089     case Intrinsic::ppc_qpx_qvstfs:
11090       VT = MVT::v4f32;
11091       break;
11092     case Intrinsic::ppc_qpx_qvstfcd:
11093       VT = MVT::v2f64;
11094       break;
11095     case Intrinsic::ppc_qpx_qvstfcs:
11096       VT = MVT::v2f32;
11097       break;
11098     default:
11099       VT = MVT::v4i32;
11100       break;
11101     }
11102
11103     Info.opc = ISD::INTRINSIC_VOID;
11104     Info.memVT = VT;
11105     Info.ptrVal = I.getArgOperand(1);
11106     Info.offset = -VT.getStoreSize()+1;
11107     Info.size = 2*VT.getStoreSize()-1;
11108     Info.align = 1;
11109     Info.vol = false;
11110     Info.readMem = false;
11111     Info.writeMem = true;
11112     return true;
11113   }
11114   case Intrinsic::ppc_qpx_qvstfda:
11115   case Intrinsic::ppc_qpx_qvstfsa:
11116   case Intrinsic::ppc_qpx_qvstfcda:
11117   case Intrinsic::ppc_qpx_qvstfcsa:
11118   case Intrinsic::ppc_qpx_qvstfiwa: {
11119     EVT VT;
11120     switch (Intrinsic) {
11121     case Intrinsic::ppc_qpx_qvstfda:
11122       VT = MVT::v4f64;
11123       break;
11124     case Intrinsic::ppc_qpx_qvstfsa:
11125       VT = MVT::v4f32;
11126       break;
11127     case Intrinsic::ppc_qpx_qvstfcda:
11128       VT = MVT::v2f64;
11129       break;
11130     case Intrinsic::ppc_qpx_qvstfcsa:
11131       VT = MVT::v2f32;
11132       break;
11133     default:
11134       VT = MVT::v4i32;
11135       break;
11136     }
11137
11138     Info.opc = ISD::INTRINSIC_VOID;
11139     Info.memVT = VT;
11140     Info.ptrVal = I.getArgOperand(1);
11141     Info.offset = 0;
11142     Info.size = VT.getStoreSize();
11143     Info.align = 1;
11144     Info.vol = false;
11145     Info.readMem = false;
11146     Info.writeMem = true;
11147     return true;
11148   }
11149   default:
11150     break;
11151   }
11152
11153   return false;
11154 }
11155
11156 /// getOptimalMemOpType - Returns the target specific optimal type for load
11157 /// and store operations as a result of memset, memcpy, and memmove
11158 /// lowering. If DstAlign is zero that means it's safe to destination
11159 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
11160 /// means there isn't a need to check it against alignment requirement,
11161 /// probably because the source does not need to be loaded. If 'IsMemset' is
11162 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
11163 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
11164 /// source is constant so it does not need to be loaded.
11165 /// It returns EVT::Other if the type should be determined using generic
11166 /// target-independent logic.
11167 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
11168                                            unsigned DstAlign, unsigned SrcAlign,
11169                                            bool IsMemset, bool ZeroMemset,
11170                                            bool MemcpyStrSrc,
11171                                            MachineFunction &MF) const {
11172   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
11173     const Function *F = MF.getFunction();
11174     // When expanding a memset, require at least two QPX instructions to cover
11175     // the cost of loading the value to be stored from the constant pool.
11176     if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
11177        (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
11178         !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
11179       return MVT::v4f64;
11180     }
11181
11182     // We should use Altivec/VSX loads and stores when available. For unaligned
11183     // addresses, unaligned VSX loads are only fast starting with the P8.
11184     if (Subtarget.hasAltivec() && Size >= 16 &&
11185         (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
11186          ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
11187       return MVT::v4i32;
11188   }
11189
11190   if (Subtarget.isPPC64()) {
11191     return MVT::i64;
11192   }
11193
11194   return MVT::i32;
11195 }
11196
11197 /// \brief Returns true if it is beneficial to convert a load of a constant
11198 /// to just the constant itself.
11199 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
11200                                                           Type *Ty) const {
11201   assert(Ty->isIntegerTy());
11202
11203   unsigned BitSize = Ty->getPrimitiveSizeInBits();
11204   if (BitSize == 0 || BitSize > 64)
11205     return false;
11206   return true;
11207 }
11208
11209 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
11210   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
11211     return false;
11212   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
11213   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
11214   return NumBits1 == 64 && NumBits2 == 32;
11215 }
11216
11217 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
11218   if (!VT1.isInteger() || !VT2.isInteger())
11219     return false;
11220   unsigned NumBits1 = VT1.getSizeInBits();
11221   unsigned NumBits2 = VT2.getSizeInBits();
11222   return NumBits1 == 64 && NumBits2 == 32;
11223 }
11224
11225 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
11226   // Generally speaking, zexts are not free, but they are free when they can be
11227   // folded with other operations.
11228   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
11229     EVT MemVT = LD->getMemoryVT();
11230     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
11231          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
11232         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
11233          LD->getExtensionType() == ISD::ZEXTLOAD))
11234       return true;
11235   }
11236
11237   // FIXME: Add other cases...
11238   //  - 32-bit shifts with a zext to i64
11239   //  - zext after ctlz, bswap, etc.
11240   //  - zext after and by a constant mask
11241
11242   return TargetLowering::isZExtFree(Val, VT2);
11243 }
11244
11245 bool PPCTargetLowering::isFPExtFree(EVT VT) const {
11246   assert(VT.isFloatingPoint());
11247   return true;
11248 }
11249
11250 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
11251   return isInt<16>(Imm) || isUInt<16>(Imm);
11252 }
11253
11254 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
11255   return isInt<16>(Imm) || isUInt<16>(Imm);
11256 }
11257
11258 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
11259                                                        unsigned,
11260                                                        unsigned,
11261                                                        bool *Fast) const {
11262   if (DisablePPCUnaligned)
11263     return false;
11264
11265   // PowerPC supports unaligned memory access for simple non-vector types.
11266   // Although accessing unaligned addresses is not as efficient as accessing
11267   // aligned addresses, it is generally more efficient than manual expansion,
11268   // and generally only traps for software emulation when crossing page
11269   // boundaries.
11270
11271   if (!VT.isSimple())
11272     return false;
11273
11274   if (VT.getSimpleVT().isVector()) {
11275     if (Subtarget.hasVSX()) {
11276       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
11277           VT != MVT::v4f32 && VT != MVT::v4i32)
11278         return false;
11279     } else {
11280       return false;
11281     }
11282   }
11283
11284   if (VT == MVT::ppcf128)
11285     return false;
11286
11287   if (Fast)
11288     *Fast = true;
11289
11290   return true;
11291 }
11292
11293 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
11294   VT = VT.getScalarType();
11295
11296   if (!VT.isSimple())
11297     return false;
11298
11299   switch (VT.getSimpleVT().SimpleTy) {
11300   case MVT::f32:
11301   case MVT::f64:
11302     return true;
11303   default:
11304     break;
11305   }
11306
11307   return false;
11308 }
11309
11310 const MCPhysReg *
11311 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
11312   // LR is a callee-save register, but we must treat it as clobbered by any call
11313   // site. Hence we include LR in the scratch registers, which are in turn added
11314   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
11315   // to CTR, which is used by any indirect call.
11316   static const MCPhysReg ScratchRegs[] = {
11317     PPC::X12, PPC::LR8, PPC::CTR8, 0
11318   };
11319
11320   return ScratchRegs;
11321 }
11322
11323 bool
11324 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
11325                      EVT VT , unsigned DefinedValues) const {
11326   if (VT == MVT::v2i64)
11327     return false;
11328
11329   if (Subtarget.hasQPX()) {
11330     if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
11331       return true;
11332   }
11333
11334   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
11335 }
11336
11337 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
11338   if (DisableILPPref || Subtarget.enableMachineScheduler())
11339     return TargetLowering::getSchedulingPreference(N);
11340
11341   return Sched::ILP;
11342 }
11343
11344 // Create a fast isel object.
11345 FastISel *
11346 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
11347                                   const TargetLibraryInfo *LibInfo) const {
11348   return PPC::createFastISel(FuncInfo, LibInfo);
11349 }