lib/Target/PowerPC/PPCISelLowering.cpp

   1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the PPCISelLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "PPCISelLowering.h"
  15 #include "MCTargetDesc/PPCPredicates.h"
  16 #include "PPCCallingConv.h"
  17 #include "PPCMachineFunctionInfo.h"
  18 #include "PPCPerfectShuffle.h"
  19 #include "PPCTargetMachine.h"
  20 #include "PPCTargetObjectFile.h"
  21 #include "llvm/ADT/STLExtras.h"
  22 #include "llvm/ADT/StringSwitch.h"
  23 #include "llvm/ADT/Triple.h"
  24 #include "llvm/CodeGen/CallingConvLower.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineFunction.h"
  27 #include "llvm/CodeGen/MachineInstrBuilder.h"
  28 #include "llvm/CodeGen/MachineLoopInfo.h"
  29 #include "llvm/CodeGen/MachineRegisterInfo.h"
  30 #include "llvm/CodeGen/SelectionDAG.h"
  31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  32 #include "llvm/IR/CallingConv.h"
  33 #include "llvm/IR/Constants.h"
  34 #include "llvm/IR/DerivedTypes.h"
  35 #include "llvm/IR/Function.h"
  36 #include "llvm/IR/Intrinsics.h"
  37 #include "llvm/Support/CommandLine.h"
  38 #include "llvm/Support/ErrorHandling.h"
  39 #include "llvm/Support/MathExtras.h"
  40 #include "llvm/Support/raw_ostream.h"
  41 #include "llvm/Target/TargetOptions.h"
  42
  43 using namespace llvm;
  44
  45 // FIXME: Remove this once soft-float is supported.
  46 static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic",
  47 cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden);
  48
  49 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
  50 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
  51
  52 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
  53 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
  54
  55 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
  56 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
  57
  58 // FIXME: Remove this once the bug has been fixed!
  59 extern cl::opt<bool> ANDIGlueBug;
  60
  61 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
  62                                      const PPCSubtarget &STI)
  63     : TargetLowering(TM), Subtarget(STI) {
  64   // Use _setjmp/_longjmp instead of setjmp/longjmp.
  65   setUseUnderscoreSetJmp(true);
  66   setUseUnderscoreLongJmp(true);
  67
  68   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
  69   // arguments are at least 4/8 bytes aligned.
  70   bool isPPC64 = Subtarget.isPPC64();
  71   setMinStackArgumentAlignment(isPPC64 ? 8:4);
  72
  73   // Set up the register classes.
  74   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
  75   addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
  76   addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
  77
  78   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
  79   for (MVT VT : MVT::integer_valuetypes()) {
  80     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
  81     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
  82   }
  83
  84   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
  85
  86   // PowerPC has pre-inc load and store's.
  87   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
  88   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
  89   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
  90   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
  91   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
  92   setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
  93   setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
  94   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
  95   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
  96   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
  97   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
  98   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
  99   setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
 100   setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
 101
 102   if (Subtarget.useCRBits()) {
 103     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 104
 105     if (isPPC64 || Subtarget.hasFPCVT()) {
 106       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
 107       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
 108                          isPPC64 ? MVT::i64 : MVT::i32);
 109       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
 110       AddPromotedToType (ISD::UINT_TO_FP, MVT::i1,
 111                          isPPC64 ? MVT::i64 : MVT::i32);
 112     } else {
 113       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
 114       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
 115     }
 116
 117     // PowerPC does not support direct load / store of condition registers
 118     setOperationAction(ISD::LOAD, MVT::i1, Custom);
 119     setOperationAction(ISD::STORE, MVT::i1, Custom);
 120
 121     // FIXME: Remove this once the ANDI glue bug is fixed:
 122     if (ANDIGlueBug)
 123       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
 124
 125     for (MVT VT : MVT::integer_valuetypes()) {
 126       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 127       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
 128       setTruncStoreAction(VT, MVT::i1, Expand);
 129     }
 130
 131     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
 132   }
 133
 134   // This is used in the ppcf128->int sequence.  Note it has different semantics
 135   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
 136   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
 137
 138   // We do not currently implement these libm ops for PowerPC.
 139   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
 140   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
 141   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
 142   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
 143   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
 144   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
 145
 146   // PowerPC has no SREM/UREM instructions
 147   setOperationAction(ISD::SREM, MVT::i32, Expand);
 148   setOperationAction(ISD::UREM, MVT::i32, Expand);
 149   setOperationAction(ISD::SREM, MVT::i64, Expand);
 150   setOperationAction(ISD::UREM, MVT::i64, Expand);
 151
 152   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
 153   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 154   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 155   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 156   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 157   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
 158   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
 159   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
 160   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
 161
 162   // We don't support sin/cos/sqrt/fmod/pow
 163   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 164   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 165   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
 166   setOperationAction(ISD::FREM , MVT::f64, Expand);
 167   setOperationAction(ISD::FPOW , MVT::f64, Expand);
 168   setOperationAction(ISD::FMA  , MVT::f64, Legal);
 169   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 170   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 171   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
 172   setOperationAction(ISD::FREM , MVT::f32, Expand);
 173   setOperationAction(ISD::FPOW , MVT::f32, Expand);
 174   setOperationAction(ISD::FMA  , MVT::f32, Legal);
 175
 176   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 177
 178   // If we're enabling GP optimizations, use hardware square root
 179   if (!Subtarget.hasFSQRT() &&
 180       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
 181         Subtarget.hasFRE()))
 182     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 183
 184   if (!Subtarget.hasFSQRT() &&
 185       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
 186         Subtarget.hasFRES()))
 187     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 188
 189   if (Subtarget.hasFCPSGN()) {
 190     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
 191     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
 192   } else {
 193     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 194     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 195   }
 196
 197   if (Subtarget.hasFPRND()) {
 198     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
 199     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
 200     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
 201     setOperationAction(ISD::FROUND, MVT::f64, Legal);
 202
 203     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
 204     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
 205     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
 206     setOperationAction(ISD::FROUND, MVT::f32, Legal);
 207   }
 208
 209   // PowerPC does not have BSWAP, CTPOP or CTTZ
 210   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
 211   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
 212   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
 213   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
 214   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
 215   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
 216   setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
 217   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
 218
 219   if (Subtarget.hasPOPCNTD()) {
 220     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
 221     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
 222   } else {
 223     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
 224     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
 225   }
 226
 227   // PowerPC does not have ROTR
 228   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
 229   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
 230
 231   if (!Subtarget.useCRBits()) {
 232     // PowerPC does not have Select
 233     setOperationAction(ISD::SELECT, MVT::i32, Expand);
 234     setOperationAction(ISD::SELECT, MVT::i64, Expand);
 235     setOperationAction(ISD::SELECT, MVT::f32, Expand);
 236     setOperationAction(ISD::SELECT, MVT::f64, Expand);
 237   }
 238
 239   // PowerPC wants to turn select_cc of FP into fsel when possible.
 240   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 241   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
 242
 243   // PowerPC wants to optimize integer setcc a bit
 244   if (!Subtarget.useCRBits())
 245     setOperationAction(ISD::SETCC, MVT::i32, Custom);
 246
 247   // PowerPC does not have BRCOND which requires SetCC
 248   if (!Subtarget.useCRBits())
 249     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 250
 251   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
 252
 253   // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
 254   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 255
 256   // PowerPC does not have [U|S]INT_TO_FP
 257   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
 258   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 259
 260   setOperationAction(ISD::BITCAST, MVT::f32, Expand);
 261   setOperationAction(ISD::BITCAST, MVT::i32, Expand);
 262   setOperationAction(ISD::BITCAST, MVT::i64, Expand);
 263   setOperationAction(ISD::BITCAST, MVT::f64, Expand);
 264
 265   // We cannot sextinreg(i1).  Expand to shifts.
 266   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 267
 268   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
 269   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
 270   // support continuation, user-level threading, and etc.. As a result, no
 271   // other SjLj exception interfaces are implemented and please don't build
 272   // your own exception handling based on them.
 273   // LLVM/Clang supports zero-cost DWARF exception handling.
 274   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
 275   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 276
 277   // We want to legalize GlobalAddress and ConstantPool nodes into the
 278   // appropriate instructions to materialize the address.
 279   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 280   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
 281   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
 282   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
 283   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
 284   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 285   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
 286   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
 287   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
 288   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
 289
 290   // TRAP is legal.
 291   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 292
 293   // TRAMPOLINE is custom lowered.
 294   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
 295   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
 296
 297   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 298   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 299
 300   if (Subtarget.isSVR4ABI()) {
 301     if (isPPC64) {
 302       // VAARG always uses double-word chunks, so promote anything smaller.
 303       setOperationAction(ISD::VAARG, MVT::i1, Promote);
 304       AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
 305       setOperationAction(ISD::VAARG, MVT::i8, Promote);
 306       AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
 307       setOperationAction(ISD::VAARG, MVT::i16, Promote);
 308       AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
 309       setOperationAction(ISD::VAARG, MVT::i32, Promote);
 310       AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
 311       setOperationAction(ISD::VAARG, MVT::Other, Expand);
 312     } else {
 313       // VAARG is custom lowered with the 32-bit SVR4 ABI.
 314       setOperationAction(ISD::VAARG, MVT::Other, Custom);
 315       setOperationAction(ISD::VAARG, MVT::i64, Custom);
 316     }
 317   } else
 318     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 319
 320   if (Subtarget.isSVR4ABI() && !isPPC64)
 321     // VACOPY is custom lowered with the 32-bit SVR4 ABI.
 322     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
 323   else
 324     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 325
 326   // Use the default implementation.
 327   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 328   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 329   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
 330   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
 331   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
 332
 333   // We want to custom lower some of our intrinsics.
 334   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 335
 336   // To handle counter-based loop conditions.
 337   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
 338
 339   // Comparisons that require checking two conditions.
 340   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
 341   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
 342   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
 343   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
 344   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
 345   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
 346   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
 347   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
 348   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
 349   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
 350   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
 351   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
 352
 353   if (Subtarget.has64BitSupport()) {
 354     // They also have instructions for converting between i64 and fp.
 355     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 356     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 357     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 358     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
 359     // This is just the low 32 bits of a (signed) fp->i64 conversion.
 360     // We cannot do this with Promote because i64 is not a legal type.
 361     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 362
 363     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
 364       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 365   } else {
 366     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
 367     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
 368   }
 369
 370   // With the instructions enabled under FPCVT, we can do everything.
 371   if (Subtarget.hasFPCVT()) {
 372     if (Subtarget.has64BitSupport()) {
 373       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 374       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
 375       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 376       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 377     }
 378
 379     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 380     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 381     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 382     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 383   }
 384
 385   if (Subtarget.use64BitRegs()) {
 386     // 64-bit PowerPC implementations can support i64 types directly
 387     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
 388     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 389     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 390     // 64-bit PowerPC wants to expand i128 shifts itself.
 391     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
 392     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
 393     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
 394   } else {
 395     // 32-bit PowerPC wants to expand i64 shifts itself.
 396     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
 397     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
 398     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
 399   }
 400
 401   if (Subtarget.hasAltivec()) {
 402     // First set operation action for all vector types to expand. Then we
 403     // will selectively turn on ones that can be effectively codegen'd.
 404     for (MVT VT : MVT::vector_valuetypes()) {
 405       // add/sub are legal for all supported vector VT's.
 406       setOperationAction(ISD::ADD , VT, Legal);
 407       setOperationAction(ISD::SUB , VT, Legal);
 408
 409       // Vector instructions introduced in P8
 410       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
 411         setOperationAction(ISD::CTPOP, VT, Legal);
 412         setOperationAction(ISD::CTLZ, VT, Legal);
 413       }
 414       else {
 415         setOperationAction(ISD::CTPOP, VT, Expand);
 416         setOperationAction(ISD::CTLZ, VT, Expand);
 417       }
 418
 419       // We promote all shuffles to v16i8.
 420       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
 421       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
 422
 423       // We promote all non-typed operations to v4i32.
 424       setOperationAction(ISD::AND   , VT, Promote);
 425       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
 426       setOperationAction(ISD::OR    , VT, Promote);
 427       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
 428       setOperationAction(ISD::XOR   , VT, Promote);
 429       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
 430       setOperationAction(ISD::LOAD  , VT, Promote);
 431       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
 432       setOperationAction(ISD::SELECT, VT, Promote);
 433       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
 434       setOperationAction(ISD::STORE, VT, Promote);
 435       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
 436
 437       // No other operations are legal.
 438       setOperationAction(ISD::MUL , VT, Expand);
 439       setOperationAction(ISD::SDIV, VT, Expand);
 440       setOperationAction(ISD::SREM, VT, Expand);
 441       setOperationAction(ISD::UDIV, VT, Expand);
 442       setOperationAction(ISD::UREM, VT, Expand);
 443       setOperationAction(ISD::FDIV, VT, Expand);
 444       setOperationAction(ISD::FREM, VT, Expand);
 445       setOperationAction(ISD::FNEG, VT, Expand);
 446       setOperationAction(ISD::FSQRT, VT, Expand);
 447       setOperationAction(ISD::FLOG, VT, Expand);
 448       setOperationAction(ISD::FLOG10, VT, Expand);
 449       setOperationAction(ISD::FLOG2, VT, Expand);
 450       setOperationAction(ISD::FEXP, VT, Expand);
 451       setOperationAction(ISD::FEXP2, VT, Expand);
 452       setOperationAction(ISD::FSIN, VT, Expand);
 453       setOperationAction(ISD::FCOS, VT, Expand);
 454       setOperationAction(ISD::FABS, VT, Expand);
 455       setOperationAction(ISD::FPOWI, VT, Expand);
 456       setOperationAction(ISD::FFLOOR, VT, Expand);
 457       setOperationAction(ISD::FCEIL,  VT, Expand);
 458       setOperationAction(ISD::FTRUNC, VT, Expand);
 459       setOperationAction(ISD::FRINT,  VT, Expand);
 460       setOperationAction(ISD::FNEARBYINT, VT, Expand);
 461       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
 462       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
 463       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
 464       setOperationAction(ISD::MULHU, VT, Expand);
 465       setOperationAction(ISD::MULHS, VT, Expand);
 466       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 467       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 468       setOperationAction(ISD::UDIVREM, VT, Expand);
 469       setOperationAction(ISD::SDIVREM, VT, Expand);
 470       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
 471       setOperationAction(ISD::FPOW, VT, Expand);
 472       setOperationAction(ISD::BSWAP, VT, Expand);
 473       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
 474       setOperationAction(ISD::CTTZ, VT, Expand);
 475       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
 476       setOperationAction(ISD::VSELECT, VT, Expand);
 477       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
 478
 479       for (MVT InnerVT : MVT::vector_valuetypes()) {
 480         setTruncStoreAction(VT, InnerVT, Expand);
 481         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
 482         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
 483         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
 484       }
 485     }
 486
 487     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
 488     // with merges, splats, etc.
 489     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
 490
 491     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
 492     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
 493     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
 494     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
 495     setOperationAction(ISD::SELECT, MVT::v4i32,
 496                        Subtarget.useCRBits() ? Legal : Expand);
 497     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
 498     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
 499     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
 500     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
 501     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
 502     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
 503     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
 504     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
 505     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
 506
 507     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
 508     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
 509     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
 510     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
 511
 512     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
 513     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
 514
 515     if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
 516       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 517       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
 518     }
 519
 520
 521     if (Subtarget.hasP8Altivec())
 522       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
 523     else
 524       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
 525
 526     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
 527     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 528
 529     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 530     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
 531
 532     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
 533     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
 534     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
 535     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 536
 537     // Altivec does not contain unordered floating-point compare instructions
 538     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
 539     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
 540     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
 541     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
 542
 543     if (Subtarget.hasVSX()) {
 544       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
 545       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
 546
 547       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
 548       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
 549       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
 550       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
 551       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
 552
 553       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
 554
 555       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
 556       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
 557
 558       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
 559       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
 560
 561       setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
 562       setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
 563       setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
 564       setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
 565       setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
 566
 567       // Share the Altivec comparison restrictions.
 568       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
 569       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
 570       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
 571       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
 572
 573       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
 574       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
 575
 576       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
 577
 578       if (Subtarget.hasP8Vector())
 579         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
 580
 581       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
 582
 583       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
 584       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
 585       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
 586
 587       if (Subtarget.hasP8Altivec()) {
 588         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
 589         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
 590         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
 591
 592         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
 593       }
 594       else {
 595         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
 596         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
 597         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
 598
 599         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
 600
 601         // VSX v2i64 only supports non-arithmetic operations.
 602         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
 603         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
 604       }
 605
 606       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
 607       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
 608       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
 609       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
 610
 611       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
 612
 613       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
 614       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
 615       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
 616       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
 617
 618       // Vector operation legalization checks the result type of
 619       // SIGN_EXTEND_INREG, overall legalization checks the inner type.
 620       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
 621       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
 622       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
 623       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
 624
 625       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
 626     }
 627
 628     if (Subtarget.hasP8Altivec()) {
 629       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
 630       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
 631     }
 632   }
 633
 634   if (Subtarget.hasQPX()) {
 635     setOperationAction(ISD::FADD, MVT::v4f64, Legal);
 636     setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
 637     setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
 638     setOperationAction(ISD::FREM, MVT::v4f64, Expand);
 639
 640     setOperationAction(ISD::FCOPYSIGN, MVT::v4f64, Legal);
 641     setOperationAction(ISD::FGETSIGN, MVT::v4f64, Expand);
 642
 643     setOperationAction(ISD::LOAD  , MVT::v4f64, Custom);
 644     setOperationAction(ISD::STORE , MVT::v4f64, Custom);
 645
 646     setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
 647     setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Custom);
 648
 649     if (!Subtarget.useCRBits())
 650       setOperationAction(ISD::SELECT, MVT::v4f64, Expand);
 651     setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
 652
 653     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f64, Legal);
 654     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f64, Expand);
 655     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f64, Expand);
 656     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f64, Expand);
 657     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f64, Custom);
 658     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f64, Legal);
 659     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
 660
 661     setOperationAction(ISD::FP_TO_SINT , MVT::v4f64, Legal);
 662     setOperationAction(ISD::FP_TO_UINT , MVT::v4f64, Expand);
 663
 664     setOperationAction(ISD::FP_ROUND , MVT::v4f32, Legal);
 665     setOperationAction(ISD::FP_ROUND_INREG , MVT::v4f32, Expand);
 666     setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal);
 667
 668     setOperationAction(ISD::FNEG , MVT::v4f64, Legal);
 669     setOperationAction(ISD::FABS , MVT::v4f64, Legal);
 670     setOperationAction(ISD::FSIN , MVT::v4f64, Expand);
 671     setOperationAction(ISD::FCOS , MVT::v4f64, Expand);
 672     setOperationAction(ISD::FPOWI , MVT::v4f64, Expand);
 673     setOperationAction(ISD::FPOW , MVT::v4f64, Expand);
 674     setOperationAction(ISD::FLOG , MVT::v4f64, Expand);
 675     setOperationAction(ISD::FLOG2 , MVT::v4f64, Expand);
 676     setOperationAction(ISD::FLOG10 , MVT::v4f64, Expand);
 677     setOperationAction(ISD::FEXP , MVT::v4f64, Expand);
 678     setOperationAction(ISD::FEXP2 , MVT::v4f64, Expand);
 679
 680     setOperationAction(ISD::FMINNUM, MVT::v4f64, Legal);
 681     setOperationAction(ISD::FMAXNUM, MVT::v4f64, Legal);
 682
 683     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f64, Legal);
 684     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f64, Legal);
 685
 686     addRegisterClass(MVT::v4f64, &PPC::QFRCRegClass);
 687
 688     setOperationAction(ISD::FADD, MVT::v4f32, Legal);
 689     setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
 690     setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
 691     setOperationAction(ISD::FREM, MVT::v4f32, Expand);
 692
 693     setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
 694     setOperationAction(ISD::FGETSIGN, MVT::v4f32, Expand);
 695
 696     setOperationAction(ISD::LOAD  , MVT::v4f32, Custom);
 697     setOperationAction(ISD::STORE , MVT::v4f32, Custom);
 698
 699     if (!Subtarget.useCRBits())
 700       setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
 701     setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
 702
 703     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4f32, Legal);
 704     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4f32, Expand);
 705     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4f32, Expand);
 706     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4f32, Expand);
 707     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4f32, Custom);
 708     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
 709     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
 710
 711     setOperationAction(ISD::FP_TO_SINT , MVT::v4f32, Legal);
 712     setOperationAction(ISD::FP_TO_UINT , MVT::v4f32, Expand);
 713
 714     setOperationAction(ISD::FNEG , MVT::v4f32, Legal);
 715     setOperationAction(ISD::FABS , MVT::v4f32, Legal);
 716     setOperationAction(ISD::FSIN , MVT::v4f32, Expand);
 717     setOperationAction(ISD::FCOS , MVT::v4f32, Expand);
 718     setOperationAction(ISD::FPOWI , MVT::v4f32, Expand);
 719     setOperationAction(ISD::FPOW , MVT::v4f32, Expand);
 720     setOperationAction(ISD::FLOG , MVT::v4f32, Expand);
 721     setOperationAction(ISD::FLOG2 , MVT::v4f32, Expand);
 722     setOperationAction(ISD::FLOG10 , MVT::v4f32, Expand);
 723     setOperationAction(ISD::FEXP , MVT::v4f32, Expand);
 724     setOperationAction(ISD::FEXP2 , MVT::v4f32, Expand);
 725
 726     setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
 727     setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
 728
 729     setIndexedLoadAction(ISD::PRE_INC, MVT::v4f32, Legal);
 730     setIndexedStoreAction(ISD::PRE_INC, MVT::v4f32, Legal);
 731
 732     addRegisterClass(MVT::v4f32, &PPC::QSRCRegClass);
 733
 734     setOperationAction(ISD::AND , MVT::v4i1, Legal);
 735     setOperationAction(ISD::OR , MVT::v4i1, Legal);
 736     setOperationAction(ISD::XOR , MVT::v4i1, Legal);
 737
 738     if (!Subtarget.useCRBits())
 739       setOperationAction(ISD::SELECT, MVT::v4i1, Expand);
 740     setOperationAction(ISD::VSELECT, MVT::v4i1, Legal);
 741
 742     setOperationAction(ISD::LOAD  , MVT::v4i1, Custom);
 743     setOperationAction(ISD::STORE , MVT::v4i1, Custom);
 744
 745     setOperationAction(ISD::EXTRACT_VECTOR_ELT , MVT::v4i1, Custom);
 746     setOperationAction(ISD::INSERT_VECTOR_ELT , MVT::v4i1, Expand);
 747     setOperationAction(ISD::CONCAT_VECTORS , MVT::v4i1, Expand);
 748     setOperationAction(ISD::EXTRACT_SUBVECTOR , MVT::v4i1, Expand);
 749     setOperationAction(ISD::VECTOR_SHUFFLE , MVT::v4i1, Custom);
 750     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i1, Expand);
 751     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i1, Custom);
 752
 753     setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
 754     setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
 755
 756     addRegisterClass(MVT::v4i1, &PPC::QBRCRegClass);
 757
 758     setOperationAction(ISD::FFLOOR, MVT::v4f64, Legal);
 759     setOperationAction(ISD::FCEIL,  MVT::v4f64, Legal);
 760     setOperationAction(ISD::FTRUNC, MVT::v4f64, Legal);
 761     setOperationAction(ISD::FROUND, MVT::v4f64, Legal);
 762
 763     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
 764     setOperationAction(ISD::FCEIL,  MVT::v4f32, Legal);
 765     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
 766     setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
 767
 768     setOperationAction(ISD::FNEARBYINT, MVT::v4f64, Expand);
 769     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
 770
 771     // These need to set FE_INEXACT, and so cannot be vectorized here.
 772     setOperationAction(ISD::FRINT, MVT::v4f64, Expand);
 773     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
 774
 775     if (TM.Options.UnsafeFPMath) {
 776       setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
 777       setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
 778
 779       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 780       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
 781     } else {
 782       setOperationAction(ISD::FDIV, MVT::v4f64, Expand);
 783       setOperationAction(ISD::FSQRT, MVT::v4f64, Expand);
 784
 785       setOperationAction(ISD::FDIV, MVT::v4f32, Expand);
 786       setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
 787     }
 788   }
 789
 790   if (Subtarget.has64BitSupport())
 791     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
 792
 793   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
 794
 795   if (!isPPC64) {
 796     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
 797     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
 798   }
 799
 800   setBooleanContents(ZeroOrOneBooleanContent);
 801
 802   if (Subtarget.hasAltivec()) {
 803     // Altivec instructions set fields to all zeros or all ones.
 804     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 805   }
 806
 807   if (!isPPC64) {
 808     // These libcalls are not available in 32-bit.
 809     setLibcallName(RTLIB::SHL_I128, nullptr);
 810     setLibcallName(RTLIB::SRL_I128, nullptr);
 811     setLibcallName(RTLIB::SRA_I128, nullptr);
 812   }
 813
 814   if (isPPC64) {
 815     setStackPointerRegisterToSaveRestore(PPC::X1);
 816     setExceptionPointerRegister(PPC::X3);
 817     setExceptionSelectorRegister(PPC::X4);
 818   } else {
 819     setStackPointerRegisterToSaveRestore(PPC::R1);
 820     setExceptionPointerRegister(PPC::R3);
 821     setExceptionSelectorRegister(PPC::R4);
 822   }
 823
 824   // We have target-specific dag combine patterns for the following nodes:
 825   setTargetDAGCombine(ISD::SINT_TO_FP);
 826   if (Subtarget.hasFPCVT())
 827     setTargetDAGCombine(ISD::UINT_TO_FP);
 828   setTargetDAGCombine(ISD::LOAD);
 829   setTargetDAGCombine(ISD::STORE);
 830   setTargetDAGCombine(ISD::BR_CC);
 831   if (Subtarget.useCRBits())
 832     setTargetDAGCombine(ISD::BRCOND);
 833   setTargetDAGCombine(ISD::BSWAP);
 834   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
 835   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
 836   setTargetDAGCombine(ISD::INTRINSIC_VOID);
 837
 838   setTargetDAGCombine(ISD::SIGN_EXTEND);
 839   setTargetDAGCombine(ISD::ZERO_EXTEND);
 840   setTargetDAGCombine(ISD::ANY_EXTEND);
 841
 842   if (Subtarget.useCRBits()) {
 843     setTargetDAGCombine(ISD::TRUNCATE);
 844     setTargetDAGCombine(ISD::SETCC);
 845     setTargetDAGCombine(ISD::SELECT_CC);
 846   }
 847
 848   // Use reciprocal estimates.
 849   if (TM.Options.UnsafeFPMath) {
 850     setTargetDAGCombine(ISD::FDIV);
 851     setTargetDAGCombine(ISD::FSQRT);
 852   }
 853
 854   // Darwin long double math library functions have $LDBL128 appended.
 855   if (Subtarget.isDarwin()) {
 856     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
 857     setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
 858     setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
 859     setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
 860     setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
 861     setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
 862     setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
 863     setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
 864     setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
 865     setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
 866   }
 867
 868   // With 32 condition bits, we don't need to sink (and duplicate) compares
 869   // aggressively in CodeGenPrep.
 870   if (Subtarget.useCRBits()) {
 871     setHasMultipleConditionRegisters();
 872     setJumpIsExpensive();
 873   }
 874
 875   setMinFunctionAlignment(2);
 876   if (Subtarget.isDarwin())
 877     setPrefFunctionAlignment(4);
 878
 879   switch (Subtarget.getDarwinDirective()) {
 880   default: break;
 881   case PPC::DIR_970:
 882   case PPC::DIR_A2:
 883   case PPC::DIR_E500mc:
 884   case PPC::DIR_E5500:
 885   case PPC::DIR_PWR4:
 886   case PPC::DIR_PWR5:
 887   case PPC::DIR_PWR5X:
 888   case PPC::DIR_PWR6:
 889   case PPC::DIR_PWR6X:
 890   case PPC::DIR_PWR7:
 891   case PPC::DIR_PWR8:
 892     setPrefFunctionAlignment(4);
 893     setPrefLoopAlignment(4);
 894     break;
 895   }
 896
 897   setInsertFencesForAtomic(true);
 898
 899   if (Subtarget.enableMachineScheduler())
 900     setSchedulingPreference(Sched::Source);
 901   else
 902     setSchedulingPreference(Sched::Hybrid);
 903
 904   computeRegisterProperties(STI.getRegisterInfo());
 905
 906   // The Freescale cores do better with aggressive inlining of memcpy and
 907   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
 908   if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
 909       Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
 910     MaxStoresPerMemset = 32;
 911     MaxStoresPerMemsetOptSize = 16;
 912     MaxStoresPerMemcpy = 32;
 913     MaxStoresPerMemcpyOptSize = 8;
 914     MaxStoresPerMemmove = 32;
 915     MaxStoresPerMemmoveOptSize = 8;
 916   } else if (Subtarget.getDarwinDirective() == PPC::DIR_A2) {
 917     // The A2 also benefits from (very) aggressive inlining of memcpy and
 918     // friends. The overhead of a the function call, even when warm, can be
 919     // over one hundred cycles.
 920     MaxStoresPerMemset = 128;
 921     MaxStoresPerMemcpy = 128;
 922     MaxStoresPerMemmove = 128;
 923   }
 924 }
 925
 926 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
 927 /// the desired ByVal argument alignment.
 928 static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign,
 929                              unsigned MaxMaxAlign) {
 930   if (MaxAlign == MaxMaxAlign)
 931     return;
 932   if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
 933     if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
 934       MaxAlign = 32;
 935     else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
 936       MaxAlign = 16;
 937   } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
 938     unsigned EltAlign = 0;
 939     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
 940     if (EltAlign > MaxAlign)
 941       MaxAlign = EltAlign;
 942   } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
 943     for (const auto *EltTy : STy->elements()) {
 944       unsigned EltAlign = 0;
 945       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
 946       if (EltAlign > MaxAlign)
 947         MaxAlign = EltAlign;
 948       if (MaxAlign == MaxMaxAlign)
 949         break;
 950     }
 951   }
 952 }
 953
 954 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
 955 /// function arguments in the caller parameter area.
 956 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
 957                                                   const DataLayout &DL) const {
 958   // Darwin passes everything on 4 byte boundary.
 959   if (Subtarget.isDarwin())
 960     return 4;
 961
 962   // 16byte and wider vectors are passed on 16byte boundary.
 963   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
 964   unsigned Align = Subtarget.isPPC64() ? 8 : 4;
 965   if (Subtarget.hasAltivec() || Subtarget.hasQPX())
 966     getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
 967   return Align;
 968 }
 969
 970 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
 971   switch ((PPCISD::NodeType)Opcode) {
 972   case PPCISD::FIRST_NUMBER:    break;
 973   case PPCISD::FSEL:            return "PPCISD::FSEL";
 974   case PPCISD::FCFID:           return "PPCISD::FCFID";
 975   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
 976   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
 977   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
 978   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
 979   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
 980   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
 981   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
 982   case PPCISD::FRE:             return "PPCISD::FRE";
 983   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
 984   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
 985   case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
 986   case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
 987   case PPCISD::VPERM:           return "PPCISD::VPERM";
 988   case PPCISD::CMPB:            return "PPCISD::CMPB";
 989   case PPCISD::Hi:              return "PPCISD::Hi";
 990   case PPCISD::Lo:              return "PPCISD::Lo";
 991   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
 992   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
 993   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
 994   case PPCISD::SRL:             return "PPCISD::SRL";
 995   case PPCISD::SRA:             return "PPCISD::SRA";
 996   case PPCISD::SHL:             return "PPCISD::SHL";
 997   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
 998   case PPCISD::CALL:            return "PPCISD::CALL";
 999   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1000   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1001   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1002   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1003   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1004   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1005   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1006   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1007   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1008   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1009   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1010   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1011   case PPCISD::ANDIo_1_EQ_BIT:  return "PPCISD::ANDIo_1_EQ_BIT";
1012   case PPCISD::ANDIo_1_GT_BIT:  return "PPCISD::ANDIo_1_GT_BIT";
1013   case PPCISD::VCMP:            return "PPCISD::VCMP";
1014   case PPCISD::VCMPo:           return "PPCISD::VCMPo";
1015   case PPCISD::LBRX:            return "PPCISD::LBRX";
1016   case PPCISD::STBRX:           return "PPCISD::STBRX";
1017   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1018   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1019   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1020   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1021   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1022   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1023   case PPCISD::BDZ:             return "PPCISD::BDZ";
1024   case PPCISD::MFFS:            return "PPCISD::MFFS";
1025   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1026   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1027   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1028   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1029   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1030   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1031   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1032   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1033   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1034   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1035   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1036   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1037   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1038   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1039   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1040   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1041   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1042   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1043   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1044   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1045   case PPCISD::SC:              return "PPCISD::SC";
1046   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1047   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1048   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1049   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1050   case PPCISD::QVFPERM:         return "PPCISD::QVFPERM";
1051   case PPCISD::QVGPCI:          return "PPCISD::QVGPCI";
1052   case PPCISD::QVALIGNI:        return "PPCISD::QVALIGNI";
1053   case PPCISD::QVESPLATI:       return "PPCISD::QVESPLATI";
1054   case PPCISD::QBFLT:           return "PPCISD::QBFLT";
1055   case PPCISD::QVLFSb:          return "PPCISD::QVLFSb";
1056   }
1057   return nullptr;
1058 }
1059
1060 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1061                                           EVT VT) const {
1062   if (!VT.isVector())
1063     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1064
1065   if (Subtarget.hasQPX())
1066     return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
1067
1068   return VT.changeVectorElementTypeToInteger();
1069 }
1070
1071 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1072   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1073   return true;
1074 }
1075
1076 //===----------------------------------------------------------------------===//
1077 // Node matching predicates, for use by the tblgen matching code.
1078 //===----------------------------------------------------------------------===//
1079
1080 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1081 static bool isFloatingPointZero(SDValue Op) {
1082   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1083     return CFP->getValueAPF().isZero();
1084   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1085     // Maybe this has already been legalized into the constant pool?
1086     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1087       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1088         return CFP->getValueAPF().isZero();
1089   }
1090   return false;
1091 }
1092
1093 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1094 /// true if Op is undef or if it matches the specified value.
1095 static bool isConstantOrUndef(int Op, int Val) {
1096   return Op < 0 || Op == Val;
1097 }
1098
1099 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1100 /// VPKUHUM instruction.
1101 /// The ShuffleKind distinguishes between big-endian operations with
1102 /// two different inputs (0), either-endian operations with two identical
1103 /// inputs (1), and little-endian operations with two different inputs (2).
1104 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1105 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1106                                SelectionDAG &DAG) {
1107   bool IsLE = DAG.getDataLayout().isLittleEndian();
1108   if (ShuffleKind == 0) {
1109     if (IsLE)
1110       return false;
1111     for (unsigned i = 0; i != 16; ++i)
1112       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1113         return false;
1114   } else if (ShuffleKind == 2) {
1115     if (!IsLE)
1116       return false;
1117     for (unsigned i = 0; i != 16; ++i)
1118       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1119         return false;
1120   } else if (ShuffleKind == 1) {
1121     unsigned j = IsLE ? 0 : 1;
1122     for (unsigned i = 0; i != 8; ++i)
1123       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1124           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1125         return false;
1126   }
1127   return true;
1128 }
1129
1130 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1131 /// VPKUWUM instruction.
1132 /// The ShuffleKind distinguishes between big-endian operations with
1133 /// two different inputs (0), either-endian operations with two identical
1134 /// inputs (1), and little-endian operations with two different inputs (2).
1135 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1136 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1137                                SelectionDAG &DAG) {
1138   bool IsLE = DAG.getDataLayout().isLittleEndian();
1139   if (ShuffleKind == 0) {
1140     if (IsLE)
1141       return false;
1142     for (unsigned i = 0; i != 16; i += 2)
1143       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1144           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1145         return false;
1146   } else if (ShuffleKind == 2) {
1147     if (!IsLE)
1148       return false;
1149     for (unsigned i = 0; i != 16; i += 2)
1150       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1151           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1152         return false;
1153   } else if (ShuffleKind == 1) {
1154     unsigned j = IsLE ? 0 : 2;
1155     for (unsigned i = 0; i != 8; i += 2)
1156       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1157           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1158           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1159           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1160         return false;
1161   }
1162   return true;
1163 }
1164
1165 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1166 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1167 /// current subtarget.
1168 ///
1169 /// The ShuffleKind distinguishes between big-endian operations with
1170 /// two different inputs (0), either-endian operations with two identical
1171 /// inputs (1), and little-endian operations with two different inputs (2).
1172 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1173 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1174                                SelectionDAG &DAG) {
1175   const PPCSubtarget& Subtarget =
1176     static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1177   if (!Subtarget.hasP8Vector())
1178     return false;
1179
1180   bool IsLE = DAG.getDataLayout().isLittleEndian();
1181   if (ShuffleKind == 0) {
1182     if (IsLE)
1183       return false;
1184     for (unsigned i = 0; i != 16; i += 4)
1185       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1186           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1187           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1188           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1189         return false;
1190   } else if (ShuffleKind == 2) {
1191     if (!IsLE)
1192       return false;
1193     for (unsigned i = 0; i != 16; i += 4)
1194       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1195           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1196           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1197           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1198         return false;
1199   } else if (ShuffleKind == 1) {
1200     unsigned j = IsLE ? 0 : 4;
1201     for (unsigned i = 0; i != 8; i += 4)
1202       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1203           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1204           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1205           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1206           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1207           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1208           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1209           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1210         return false;
1211   }
1212   return true;
1213 }
1214
1215 /// isVMerge - Common function, used to match vmrg* shuffles.
1216 ///
1217 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1218                      unsigned LHSStart, unsigned RHSStart) {
1219   if (N->getValueType(0) != MVT::v16i8)
1220     return false;
1221   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1222          "Unsupported merge size!");
1223
1224   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1225     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1226       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1227                              LHSStart+j+i*UnitSize) ||
1228           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1229                              RHSStart+j+i*UnitSize))
1230         return false;
1231     }
1232   return true;
1233 }
1234
1235 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1236 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1237 /// The ShuffleKind distinguishes between big-endian merges with two
1238 /// different inputs (0), either-endian merges with two identical inputs (1),
1239 /// and little-endian merges with two different inputs (2).  For the latter,
1240 /// the input operands are swapped (see PPCInstrAltivec.td).
1241 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1242                              unsigned ShuffleKind, SelectionDAG &DAG) {
1243   if (DAG.getDataLayout().isLittleEndian()) {
1244     if (ShuffleKind == 1) // unary
1245       return isVMerge(N, UnitSize, 0, 0);
1246     else if (ShuffleKind == 2) // swapped
1247       return isVMerge(N, UnitSize, 0, 16);
1248     else
1249       return false;
1250   } else {
1251     if (ShuffleKind == 1) // unary
1252       return isVMerge(N, UnitSize, 8, 8);
1253     else if (ShuffleKind == 0) // normal
1254       return isVMerge(N, UnitSize, 8, 24);
1255     else
1256       return false;
1257   }
1258 }
1259
1260 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1261 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1262 /// The ShuffleKind distinguishes between big-endian merges with two
1263 /// different inputs (0), either-endian merges with two identical inputs (1),
1264 /// and little-endian merges with two different inputs (2).  For the latter,
1265 /// the input operands are swapped (see PPCInstrAltivec.td).
1266 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1267                              unsigned ShuffleKind, SelectionDAG &DAG) {
1268   if (DAG.getDataLayout().isLittleEndian()) {
1269     if (ShuffleKind == 1) // unary
1270       return isVMerge(N, UnitSize, 8, 8);
1271     else if (ShuffleKind == 2) // swapped
1272       return isVMerge(N, UnitSize, 8, 24);
1273     else
1274       return false;
1275   } else {
1276     if (ShuffleKind == 1) // unary
1277       return isVMerge(N, UnitSize, 0, 0);
1278     else if (ShuffleKind == 0) // normal
1279       return isVMerge(N, UnitSize, 0, 16);
1280     else
1281       return false;
1282   }
1283 }
1284
1285 /**
1286  * \brief Common function used to match vmrgew and vmrgow shuffles
1287  *
1288  * The indexOffset determines whether to look for even or odd words in
1289  * the shuffle mask. This is based on the of the endianness of the target
1290  * machine.
1291  *   - Little Endian:
1292  *     - Use offset of 0 to check for odd elements
1293  *     - Use offset of 4 to check for even elements
1294  *   - Big Endian:
1295  *     - Use offset of 0 to check for even elements
1296  *     - Use offset of 4 to check for odd elements
1297  * A detailed description of the vector element ordering for little endian and
1298  * big endian can be found at
1299  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1300  * Targeting your applications - what little endian and big endian IBM XL C/C++
1301  * compiler differences mean to you
1302  *
1303  * The mask to the shuffle vector instruction specifies the indices of the
1304  * elements from the two input vectors to place in the result. The elements are
1305  * numbered in array-access order, starting with the first vector. These vectors
1306  * are always of type v16i8, thus each vector will contain 16 elements of size
1307  * 8. More info on the shuffle vector can be found in the
1308  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1309  * Language Reference.
1310  *
1311  * The RHSStartValue indicates whether the same input vectors are used (unary)
1312  * or two different input vectors are used, based on the following:
1313  *   - If the instruction uses the same vector for both inputs, the range of the
1314  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1315  *     be 0.
1316  *   - If the instruction has two different vectors then the range of the
1317  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1318  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1319  *     to 31 specify elements in the second vector).
1320  *
1321  * \param[in] N The shuffle vector SD Node to analyze
1322  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1323  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1324  * vector to the shuffle_vector instruction
1325  * \return true iff this shuffle vector represents an even or odd word merge
1326  */
1327 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1328                      unsigned RHSStartValue) {
1329   if (N->getValueType(0) != MVT::v16i8)
1330     return false;
1331
1332   for (unsigned i = 0; i < 2; ++i)
1333     for (unsigned j = 0; j < 4; ++j)
1334       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1335                              i*RHSStartValue+j+IndexOffset) ||
1336           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1337                              i*RHSStartValue+j+IndexOffset+8))
1338         return false;
1339   return true;
1340 }
1341
1342 /**
1343  * \brief Determine if the specified shuffle mask is suitable for the vmrgew or
1344  * vmrgow instructions.
1345  *
1346  * \param[in] N The shuffle vector SD Node to analyze
1347  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1348  * \param[in] ShuffleKind Identify the type of merge:
1349  *   - 0 = big-endian merge with two different inputs;
1350  *   - 1 = either-endian merge with two identical inputs;
1351  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1352  *     little-endian merges).
1353  * \param[in] DAG The current SelectionDAG
1354  * \return true iff this shuffle mask
1355  */
1356 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1357                               unsigned ShuffleKind, SelectionDAG &DAG) {
1358   if (DAG.getDataLayout().isLittleEndian()) {
1359     unsigned indexOffset = CheckEven ? 4 : 0;
1360     if (ShuffleKind == 1) // Unary
1361       return isVMerge(N, indexOffset, 0);
1362     else if (ShuffleKind == 2) // swapped
1363       return isVMerge(N, indexOffset, 16);
1364     else
1365       return false;
1366   }
1367   else {
1368     unsigned indexOffset = CheckEven ? 0 : 4;
1369     if (ShuffleKind == 1) // Unary
1370       return isVMerge(N, indexOffset, 0);
1371     else if (ShuffleKind == 0) // Normal
1372       return isVMerge(N, indexOffset, 16);
1373     else
1374       return false;
1375   }
1376   return false;
1377 }
1378
1379 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1380 /// amount, otherwise return -1.
1381 /// The ShuffleKind distinguishes between big-endian operations with two
1382 /// different inputs (0), either-endian operations with two identical inputs
1383 /// (1), and little-endian operations with two different inputs (2).  For the
1384 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1385 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1386                              SelectionDAG &DAG) {
1387   if (N->getValueType(0) != MVT::v16i8)
1388     return -1;
1389
1390   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1391
1392   // Find the first non-undef value in the shuffle mask.
1393   unsigned i;
1394   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1395     /*search*/;
1396
1397   if (i == 16) return -1;  // all undef.
1398
1399   // Otherwise, check to see if the rest of the elements are consecutively
1400   // numbered from this value.
1401   unsigned ShiftAmt = SVOp->getMaskElt(i);
1402   if (ShiftAmt < i) return -1;
1403
1404   ShiftAmt -= i;
1405   bool isLE = DAG.getDataLayout().isLittleEndian();
1406
1407   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1408     // Check the rest of the elements to see if they are consecutive.
1409     for (++i; i != 16; ++i)
1410       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1411         return -1;
1412   } else if (ShuffleKind == 1) {
1413     // Check the rest of the elements to see if they are consecutive.
1414     for (++i; i != 16; ++i)
1415       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1416         return -1;
1417   } else
1418     return -1;
1419
1420   if (isLE)
1421     ShiftAmt = 16 - ShiftAmt;
1422
1423   return ShiftAmt;
1424 }
1425
1426 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1427 /// specifies a splat of a single element that is suitable for input to
1428 /// VSPLTB/VSPLTH/VSPLTW.
1429 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1430   assert(N->getValueType(0) == MVT::v16i8 &&
1431          (EltSize == 1 || EltSize == 2 || EltSize == 4));
1432
1433   // This is a splat operation if each element of the permute is the same, and
1434   // if the value doesn't reference the second vector.
1435   unsigned ElementBase = N->getMaskElt(0);
1436
1437   // FIXME: Handle UNDEF elements too!
1438   if (ElementBase >= 16)
1439     return false;
1440
1441   // Check that the indices are consecutive, in the case of a multi-byte element
1442   // splatted with a v16i8 mask.
1443   for (unsigned i = 1; i != EltSize; ++i)
1444     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
1445       return false;
1446
1447   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
1448     if (N->getMaskElt(i) < 0) continue;
1449     for (unsigned j = 0; j != EltSize; ++j)
1450       if (N->getMaskElt(i+j) != N->getMaskElt(j))
1451         return false;
1452   }
1453   return true;
1454 }
1455
1456 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
1457 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
1458 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
1459                                 SelectionDAG &DAG) {
1460   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1461   assert(isSplatShuffleMask(SVOp, EltSize));
1462   if (DAG.getDataLayout().isLittleEndian())
1463     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
1464   else
1465     return SVOp->getMaskElt(0) / EltSize;
1466 }
1467
1468 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
1469 /// by using a vspltis[bhw] instruction of the specified element size, return
1470 /// the constant being splatted.  The ByteSize field indicates the number of
1471 /// bytes of each element [124] -> [bhw].
1472 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
1473   SDValue OpVal(nullptr, 0);
1474
1475   // If ByteSize of the splat is bigger than the element size of the
1476   // build_vector, then we have a case where we are checking for a splat where
1477   // multiple elements of the buildvector are folded together into a single
1478   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
1479   unsigned EltSize = 16/N->getNumOperands();
1480   if (EltSize < ByteSize) {
1481     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
1482     SDValue UniquedVals[4];
1483     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
1484
1485     // See if all of the elements in the buildvector agree across.
1486     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1487       if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1488       // If the element isn't a constant, bail fully out.
1489       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
1490
1491
1492       if (!UniquedVals[i&(Multiple-1)].getNode())
1493         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
1494       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
1495         return SDValue();  // no match.
1496     }
1497
1498     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
1499     // either constant or undef values that are identical for each chunk.  See
1500     // if these chunks can form into a larger vspltis*.
1501
1502     // Check to see if all of the leading entries are either 0 or -1.  If
1503     // neither, then this won't fit into the immediate field.
1504     bool LeadingZero = true;
1505     bool LeadingOnes = true;
1506     for (unsigned i = 0; i != Multiple-1; ++i) {
1507       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
1508
1509       LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
1510       LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
1511     }
1512     // Finally, check the least significant entry.
1513     if (LeadingZero) {
1514       if (!UniquedVals[Multiple-1].getNode())
1515         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
1516       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
1517       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
1518         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1519     }
1520     if (LeadingOnes) {
1521       if (!UniquedVals[Multiple-1].getNode())
1522         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
1523       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
1524       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
1525         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
1526     }
1527
1528     return SDValue();
1529   }
1530
1531   // Check to see if this buildvec has a single non-undef value in its elements.
1532   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1533     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1534     if (!OpVal.getNode())
1535       OpVal = N->getOperand(i);
1536     else if (OpVal != N->getOperand(i))
1537       return SDValue();
1538   }
1539
1540   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
1541
1542   unsigned ValSizeInBytes = EltSize;
1543   uint64_t Value = 0;
1544   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1545     Value = CN->getZExtValue();
1546   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1547     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
1548     Value = FloatToBits(CN->getValueAPF().convertToFloat());
1549   }
1550
1551   // If the splat value is larger than the element value, then we can never do
1552   // this splat.  The only case that we could fit the replicated bits into our
1553   // immediate field for would be zero, and we prefer to use vxor for it.
1554   if (ValSizeInBytes < ByteSize) return SDValue();
1555
1556   // If the element value is larger than the splat value, check if it consists
1557   // of a repeated bit pattern of size ByteSize.
1558   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
1559     return SDValue();
1560
1561   // Properly sign extend the value.
1562   int MaskVal = SignExtend32(Value, ByteSize * 8);
1563
1564   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
1565   if (MaskVal == 0) return SDValue();
1566
1567   // Finally, if this value fits in a 5 bit sext field, return it
1568   if (SignExtend32<5>(MaskVal) == MaskVal)
1569     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
1570   return SDValue();
1571 }
1572
1573 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
1574 /// amount, otherwise return -1.
1575 int PPC::isQVALIGNIShuffleMask(SDNode *N) {
1576   EVT VT = N->getValueType(0);
1577   if (VT != MVT::v4f64 && VT != MVT::v4f32 && VT != MVT::v4i1)
1578     return -1;
1579
1580   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1581
1582   // Find the first non-undef value in the shuffle mask.
1583   unsigned i;
1584   for (i = 0; i != 4 && SVOp->getMaskElt(i) < 0; ++i)
1585     /*search*/;
1586
1587   if (i == 4) return -1;  // all undef.
1588
1589   // Otherwise, check to see if the rest of the elements are consecutively
1590   // numbered from this value.
1591   unsigned ShiftAmt = SVOp->getMaskElt(i);
1592   if (ShiftAmt < i) return -1;
1593   ShiftAmt -= i;
1594
1595   // Check the rest of the elements to see if they are consecutive.
1596   for (++i; i != 4; ++i)
1597     if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1598       return -1;
1599
1600   return ShiftAmt;
1601 }
1602
1603 //===----------------------------------------------------------------------===//
1604 //  Addressing Mode Selection
1605 //===----------------------------------------------------------------------===//
1606
1607 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
1608 /// or 64-bit immediate, and if the value can be accurately represented as a
1609 /// sign extension from a 16-bit value.  If so, this returns true and the
1610 /// immediate.
1611 static bool isIntS16Immediate(SDNode *N, short &Imm) {
1612   if (!isa<ConstantSDNode>(N))
1613     return false;
1614
1615   Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
1616   if (N->getValueType(0) == MVT::i32)
1617     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
1618   else
1619     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
1620 }
1621 static bool isIntS16Immediate(SDValue Op, short &Imm) {
1622   return isIntS16Immediate(Op.getNode(), Imm);
1623 }
1624
1625
1626 /// SelectAddressRegReg - Given the specified addressed, check to see if it
1627 /// can be represented as an indexed [r+r] operation.  Returns false if it
1628 /// can be more efficiently represented with [r+imm].
1629 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
1630                                             SDValue &Index,
1631                                             SelectionDAG &DAG) const {
1632   short imm = 0;
1633   if (N.getOpcode() == ISD::ADD) {
1634     if (isIntS16Immediate(N.getOperand(1), imm))
1635       return false;    // r+i
1636     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
1637       return false;    // r+i
1638
1639     Base = N.getOperand(0);
1640     Index = N.getOperand(1);
1641     return true;
1642   } else if (N.getOpcode() == ISD::OR) {
1643     if (isIntS16Immediate(N.getOperand(1), imm))
1644       return false;    // r+i can fold it if we can.
1645
1646     // If this is an or of disjoint bitfields, we can codegen this as an add
1647     // (for better address arithmetic) if the LHS and RHS of the OR are provably
1648     // disjoint.
1649     APInt LHSKnownZero, LHSKnownOne;
1650     APInt RHSKnownZero, RHSKnownOne;
1651     DAG.computeKnownBits(N.getOperand(0),
1652                          LHSKnownZero, LHSKnownOne);
1653
1654     if (LHSKnownZero.getBoolValue()) {
1655       DAG.computeKnownBits(N.getOperand(1),
1656                            RHSKnownZero, RHSKnownOne);
1657       // If all of the bits are known zero on the LHS or RHS, the add won't
1658       // carry.
1659       if (~(LHSKnownZero | RHSKnownZero) == 0) {
1660         Base = N.getOperand(0);
1661         Index = N.getOperand(1);
1662         return true;
1663       }
1664     }
1665   }
1666
1667   return false;
1668 }
1669
1670 // If we happen to be doing an i64 load or store into a stack slot that has
1671 // less than a 4-byte alignment, then the frame-index elimination may need to
1672 // use an indexed load or store instruction (because the offset may not be a
1673 // multiple of 4). The extra register needed to hold the offset comes from the
1674 // register scavenger, and it is possible that the scavenger will need to use
1675 // an emergency spill slot. As a result, we need to make sure that a spill slot
1676 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
1677 // stack slot.
1678 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
1679   // FIXME: This does not handle the LWA case.
1680   if (VT != MVT::i64)
1681     return;
1682
1683   // NOTE: We'll exclude negative FIs here, which come from argument
1684   // lowering, because there are no known test cases triggering this problem
1685   // using packed structures (or similar). We can remove this exclusion if
1686   // we find such a test case. The reason why this is so test-case driven is
1687   // because this entire 'fixup' is only to prevent crashes (from the
1688   // register scavenger) on not-really-valid inputs. For example, if we have:
1689   //   %a = alloca i1
1690   //   %b = bitcast i1* %a to i64*
1691   //   store i64* a, i64 b
1692   // then the store should really be marked as 'align 1', but is not. If it
1693   // were marked as 'align 1' then the indexed form would have been
1694   // instruction-selected initially, and the problem this 'fixup' is preventing
1695   // won't happen regardless.
1696   if (FrameIdx < 0)
1697     return;
1698
1699   MachineFunction &MF = DAG.getMachineFunction();
1700   MachineFrameInfo *MFI = MF.getFrameInfo();
1701
1702   unsigned Align = MFI->getObjectAlignment(FrameIdx);
1703   if (Align >= 4)
1704     return;
1705
1706   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1707   FuncInfo->setHasNonRISpills();
1708 }
1709
1710 /// Returns true if the address N can be represented by a base register plus
1711 /// a signed 16-bit displacement [r+imm], and if it is not better
1712 /// represented as reg+reg.  If Aligned is true, only accept displacements
1713 /// suitable for STD and friends, i.e. multiples of 4.
1714 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
1715                                             SDValue &Base,
1716                                             SelectionDAG &DAG,
1717                                             bool Aligned) const {
1718   // FIXME dl should come from parent load or store, not from address
1719   SDLoc dl(N);
1720   // If this can be more profitably realized as r+r, fail.
1721   if (SelectAddressRegReg(N, Disp, Base, DAG))
1722     return false;
1723
1724   if (N.getOpcode() == ISD::ADD) {
1725     short imm = 0;
1726     if (isIntS16Immediate(N.getOperand(1), imm) &&
1727         (!Aligned || (imm & 3) == 0)) {
1728       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1729       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1730         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1731         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1732       } else {
1733         Base = N.getOperand(0);
1734       }
1735       return true; // [r+i]
1736     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
1737       // Match LOAD (ADD (X, Lo(G))).
1738       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
1739              && "Cannot handle constant offsets yet!");
1740       Disp = N.getOperand(1).getOperand(0);  // The global address.
1741       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
1742              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
1743              Disp.getOpcode() == ISD::TargetConstantPool ||
1744              Disp.getOpcode() == ISD::TargetJumpTable);
1745       Base = N.getOperand(0);
1746       return true;  // [&g+r]
1747     }
1748   } else if (N.getOpcode() == ISD::OR) {
1749     short imm = 0;
1750     if (isIntS16Immediate(N.getOperand(1), imm) &&
1751         (!Aligned || (imm & 3) == 0)) {
1752       // If this is an or of disjoint bitfields, we can codegen this as an add
1753       // (for better address arithmetic) if the LHS and RHS of the OR are
1754       // provably disjoint.
1755       APInt LHSKnownZero, LHSKnownOne;
1756       DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
1757
1758       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
1759         // If all of the bits are known zero on the LHS or RHS, the add won't
1760         // carry.
1761         if (FrameIndexSDNode *FI =
1762               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
1763           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1764           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1765         } else {
1766           Base = N.getOperand(0);
1767         }
1768         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
1769         return true;
1770       }
1771     }
1772   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
1773     // Loading from a constant address.
1774
1775     // If this address fits entirely in a 16-bit sext immediate field, codegen
1776     // this as "d, 0"
1777     short Imm;
1778     if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
1779       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
1780       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1781                              CN->getValueType(0));
1782       return true;
1783     }
1784
1785     // Handle 32-bit sext immediates with LIS + addr mode.
1786     if ((CN->getValueType(0) == MVT::i32 ||
1787          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
1788         (!Aligned || (CN->getZExtValue() & 3) == 0)) {
1789       int Addr = (int)CN->getZExtValue();
1790
1791       // Otherwise, break this down into an LIS + disp.
1792       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
1793
1794       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
1795                                    MVT::i32);
1796       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
1797       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
1798       return true;
1799     }
1800   }
1801
1802   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
1803   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
1804     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
1805     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
1806   } else
1807     Base = N;
1808   return true;      // [r+0]
1809 }
1810
1811 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
1812 /// represented as an indexed [r+r] operation.
1813 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
1814                                                 SDValue &Index,
1815                                                 SelectionDAG &DAG) const {
1816   // Check to see if we can easily represent this as an [r+r] address.  This
1817   // will fail if it thinks that the address is more profitably represented as
1818   // reg+imm, e.g. where imm = 0.
1819   if (SelectAddressRegReg(N, Base, Index, DAG))
1820     return true;
1821
1822   // If the operand is an addition, always emit this as [r+r], since this is
1823   // better (for code size, and execution, as the memop does the add for free)
1824   // than emitting an explicit add.
1825   if (N.getOpcode() == ISD::ADD) {
1826     Base = N.getOperand(0);
1827     Index = N.getOperand(1);
1828     return true;
1829   }
1830
1831   // Otherwise, do it the hard way, using R0 as the base register.
1832   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
1833                          N.getValueType());
1834   Index = N;
1835   return true;
1836 }
1837
1838 /// getPreIndexedAddressParts - returns true by value, base pointer and
1839 /// offset pointer and addressing mode by reference if the node's address
1840 /// can be legally represented as pre-indexed load / store address.
1841 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
1842                                                   SDValue &Offset,
1843                                                   ISD::MemIndexedMode &AM,
1844                                                   SelectionDAG &DAG) const {
1845   if (DisablePPCPreinc) return false;
1846
1847   bool isLoad = true;
1848   SDValue Ptr;
1849   EVT VT;
1850   unsigned Alignment;
1851   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1852     Ptr = LD->getBasePtr();
1853     VT = LD->getMemoryVT();
1854     Alignment = LD->getAlignment();
1855   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
1856     Ptr = ST->getBasePtr();
1857     VT  = ST->getMemoryVT();
1858     Alignment = ST->getAlignment();
1859     isLoad = false;
1860   } else
1861     return false;
1862
1863   // PowerPC doesn't have preinc load/store instructions for vectors (except
1864   // for QPX, which does have preinc r+r forms).
1865   if (VT.isVector()) {
1866     if (!Subtarget.hasQPX() || (VT != MVT::v4f64 && VT != MVT::v4f32)) {
1867       return false;
1868     } else if (SelectAddressRegRegOnly(Ptr, Offset, Base, DAG)) {
1869       AM = ISD::PRE_INC;
1870       return true;
1871     }
1872   }
1873
1874   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
1875
1876     // Common code will reject creating a pre-inc form if the base pointer
1877     // is a frame index, or if N is a store and the base pointer is either
1878     // the same as or a predecessor of the value being stored.  Check for
1879     // those situations here, and try with swapped Base/Offset instead.
1880     bool Swap = false;
1881
1882     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
1883       Swap = true;
1884     else if (!isLoad) {
1885       SDValue Val = cast<StoreSDNode>(N)->getValue();
1886       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
1887         Swap = true;
1888     }
1889
1890     if (Swap)
1891       std::swap(Base, Offset);
1892
1893     AM = ISD::PRE_INC;
1894     return true;
1895   }
1896
1897   // LDU/STU can only handle immediates that are a multiple of 4.
1898   if (VT != MVT::i64) {
1899     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
1900       return false;
1901   } else {
1902     // LDU/STU need an address with at least 4-byte alignment.
1903     if (Alignment < 4)
1904       return false;
1905
1906     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
1907       return false;
1908   }
1909
1910   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1911     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
1912     // sext i32 to i64 when addr mode is r+i.
1913     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
1914         LD->getExtensionType() == ISD::SEXTLOAD &&
1915         isa<ConstantSDNode>(Offset))
1916       return false;
1917   }
1918
1919   AM = ISD::PRE_INC;
1920   return true;
1921 }
1922
1923 //===----------------------------------------------------------------------===//
1924 //  LowerOperation implementation
1925 //===----------------------------------------------------------------------===//
1926
1927 /// GetLabelAccessInfo - Return true if we should reference labels using a
1928 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
1929 static bool GetLabelAccessInfo(const TargetMachine &TM,
1930                                const PPCSubtarget &Subtarget,
1931                                unsigned &HiOpFlags, unsigned &LoOpFlags,
1932                                const GlobalValue *GV = nullptr) {
1933   HiOpFlags = PPCII::MO_HA;
1934   LoOpFlags = PPCII::MO_LO;
1935
1936   // Don't use the pic base if not in PIC relocation model.
1937   bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
1938
1939   if (isPIC) {
1940     HiOpFlags |= PPCII::MO_PIC_FLAG;
1941     LoOpFlags |= PPCII::MO_PIC_FLAG;
1942   }
1943
1944   // If this is a reference to a global value that requires a non-lazy-ptr, make
1945   // sure that instruction lowering adds it.
1946   if (GV && Subtarget.hasLazyResolverStub(GV)) {
1947     HiOpFlags |= PPCII::MO_NLP_FLAG;
1948     LoOpFlags |= PPCII::MO_NLP_FLAG;
1949
1950     if (GV->hasHiddenVisibility()) {
1951       HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1952       LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
1953     }
1954   }
1955
1956   return isPIC;
1957 }
1958
1959 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
1960                              SelectionDAG &DAG) {
1961   SDLoc DL(HiPart);
1962   EVT PtrVT = HiPart.getValueType();
1963   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
1964
1965   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
1966   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
1967
1968   // With PIC, the first instruction is actually "GR+hi(&G)".
1969   if (isPIC)
1970     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
1971                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
1972
1973   // Generate non-pic code that has direct accesses to the constant pool.
1974   // The address of the global is just (hi(&g)+lo(&g)).
1975   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
1976 }
1977
1978 static void setUsesTOCBasePtr(MachineFunction &MF) {
1979   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
1980   FuncInfo->setUsesTOCBasePtr();
1981 }
1982
1983 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
1984   setUsesTOCBasePtr(DAG.getMachineFunction());
1985 }
1986
1987 static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
1988                            SDValue GA) {
1989   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
1990   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT) :
1991                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
1992
1993   SDValue Ops[] = { GA, Reg };
1994   return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
1995                                  DAG.getVTList(VT, MVT::Other), Ops, VT,
1996                                  MachinePointerInfo::getGOT(), 0, false, true,
1997                                  false, 0);
1998 }
1999
2000 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2001                                              SelectionDAG &DAG) const {
2002   EVT PtrVT = Op.getValueType();
2003   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2004   const Constant *C = CP->getConstVal();
2005
2006   // 64-bit SVR4 ABI code is always position-independent.
2007   // The actual address of the GlobalValue is stored in the TOC.
2008   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2009     setUsesTOCBasePtr(DAG);
2010     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
2011     return getTOCEntry(DAG, SDLoc(CP), true, GA);
2012   }
2013
2014   unsigned MOHiFlag, MOLoFlag;
2015   bool isPIC =
2016       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
2017
2018   if (isPIC && Subtarget.isSVR4ABI()) {
2019     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
2020                                            PPCII::MO_PIC_FLAG);
2021     return getTOCEntry(DAG, SDLoc(CP), false, GA);
2022   }
2023
2024   SDValue CPIHi =
2025     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
2026   SDValue CPILo =
2027     DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
2028   return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
2029 }
2030
2031 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
2032   EVT PtrVT = Op.getValueType();
2033   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2034
2035   // 64-bit SVR4 ABI code is always position-independent.
2036   // The actual address of the GlobalValue is stored in the TOC.
2037   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2038     setUsesTOCBasePtr(DAG);
2039     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
2040     return getTOCEntry(DAG, SDLoc(JT), true, GA);
2041   }
2042
2043   unsigned MOHiFlag, MOLoFlag;
2044   bool isPIC =
2045       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
2046
2047   if (isPIC && Subtarget.isSVR4ABI()) {
2048     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
2049                                         PPCII::MO_PIC_FLAG);
2050     return getTOCEntry(DAG, SDLoc(GA), false, GA);
2051   }
2052
2053   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
2054   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
2055   return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
2056 }
2057
2058 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
2059                                              SelectionDAG &DAG) const {
2060   EVT PtrVT = Op.getValueType();
2061   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
2062   const BlockAddress *BA = BASDN->getBlockAddress();
2063
2064   // 64-bit SVR4 ABI code is always position-independent.
2065   // The actual BlockAddress is stored in the TOC.
2066   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2067     setUsesTOCBasePtr(DAG);
2068     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
2069     return getTOCEntry(DAG, SDLoc(BASDN), true, GA);
2070   }
2071
2072   unsigned MOHiFlag, MOLoFlag;
2073   bool isPIC =
2074       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag);
2075   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
2076   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
2077   return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
2078 }
2079
2080 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2081                                               SelectionDAG &DAG) const {
2082
2083   // FIXME: TLS addresses currently use medium model code sequences,
2084   // which is the most useful form.  Eventually support for small and
2085   // large models could be added if users need it, at the cost of
2086   // additional complexity.
2087   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2088   SDLoc dl(GA);
2089   const GlobalValue *GV = GA->getGlobal();
2090   EVT PtrVT = getPointerTy(DAG.getDataLayout());
2091   bool is64bit = Subtarget.isPPC64();
2092   const Module *M = DAG.getMachineFunction().getFunction()->getParent();
2093   PICLevel::Level picLevel = M->getPICLevel();
2094
2095   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
2096
2097   if (Model == TLSModel::LocalExec) {
2098     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2099                                                PPCII::MO_TPREL_HA);
2100     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2101                                                PPCII::MO_TPREL_LO);
2102     SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
2103                                      is64bit ? MVT::i64 : MVT::i32);
2104     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
2105     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
2106   }
2107
2108   if (Model == TLSModel::InitialExec) {
2109     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2110     SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
2111                                                 PPCII::MO_TLS);
2112     SDValue GOTPtr;
2113     if (is64bit) {
2114       setUsesTOCBasePtr(DAG);
2115       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2116       GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
2117                            PtrVT, GOTReg, TGA);
2118     } else
2119       GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
2120     SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
2121                                    PtrVT, TGA, GOTPtr);
2122     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
2123   }
2124
2125   if (Model == TLSModel::GeneralDynamic) {
2126     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2127     SDValue GOTPtr;
2128     if (is64bit) {
2129       setUsesTOCBasePtr(DAG);
2130       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2131       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
2132                                    GOTReg, TGA);
2133     } else {
2134       if (picLevel == PICLevel::Small)
2135         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2136       else
2137         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2138     }
2139     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
2140                        GOTPtr, TGA, TGA);
2141   }
2142
2143   if (Model == TLSModel::LocalDynamic) {
2144     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
2145     SDValue GOTPtr;
2146     if (is64bit) {
2147       setUsesTOCBasePtr(DAG);
2148       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
2149       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
2150                            GOTReg, TGA);
2151     } else {
2152       if (picLevel == PICLevel::Small)
2153         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
2154       else
2155         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
2156     }
2157     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
2158                                   PtrVT, GOTPtr, TGA, TGA);
2159     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
2160                                       PtrVT, TLSAddr, TGA);
2161     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
2162   }
2163
2164   llvm_unreachable("Unknown TLS model!");
2165 }
2166
2167 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
2168                                               SelectionDAG &DAG) const {
2169   EVT PtrVT = Op.getValueType();
2170   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
2171   SDLoc DL(GSDN);
2172   const GlobalValue *GV = GSDN->getGlobal();
2173
2174   // 64-bit SVR4 ABI code is always position-independent.
2175   // The actual address of the GlobalValue is stored in the TOC.
2176   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
2177     setUsesTOCBasePtr(DAG);
2178     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
2179     return getTOCEntry(DAG, DL, true, GA);
2180   }
2181
2182   unsigned MOHiFlag, MOLoFlag;
2183   bool isPIC =
2184       GetLabelAccessInfo(DAG.getTarget(), Subtarget, MOHiFlag, MOLoFlag, GV);
2185
2186   if (isPIC && Subtarget.isSVR4ABI()) {
2187     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
2188                                             GSDN->getOffset(),
2189                                             PPCII::MO_PIC_FLAG);
2190     return getTOCEntry(DAG, DL, false, GA);
2191   }
2192
2193   SDValue GAHi =
2194     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
2195   SDValue GALo =
2196     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
2197
2198   SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
2199
2200   // If the global reference is actually to a non-lazy-pointer, we have to do an
2201   // extra load to get the address of the global.
2202   if (MOHiFlag & PPCII::MO_NLP_FLAG)
2203     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
2204                       false, false, false, 0);
2205   return Ptr;
2206 }
2207
2208 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
2209   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2210   SDLoc dl(Op);
2211
2212   if (Op.getValueType() == MVT::v2i64) {
2213     // When the operands themselves are v2i64 values, we need to do something
2214     // special because VSX has no underlying comparison operations for these.
2215     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
2216       // Equality can be handled by casting to the legal type for Altivec
2217       // comparisons, everything else needs to be expanded.
2218       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
2219         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
2220                  DAG.getSetCC(dl, MVT::v4i32,
2221                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
2222                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
2223                    CC));
2224       }
2225
2226       return SDValue();
2227     }
2228
2229     // We handle most of these in the usual way.
2230     return Op;
2231   }
2232
2233   // If we're comparing for equality to zero, expose the fact that this is
2234   // implented as a ctlz/srl pair on ppc, so that the dag combiner can
2235   // fold the new nodes.
2236   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2237     if (C->isNullValue() && CC == ISD::SETEQ) {
2238       EVT VT = Op.getOperand(0).getValueType();
2239       SDValue Zext = Op.getOperand(0);
2240       if (VT.bitsLT(MVT::i32)) {
2241         VT = MVT::i32;
2242         Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
2243       }
2244       unsigned Log2b = Log2_32(VT.getSizeInBits());
2245       SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
2246       SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
2247                                 DAG.getConstant(Log2b, dl, MVT::i32));
2248       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
2249     }
2250     // Leave comparisons against 0 and -1 alone for now, since they're usually
2251     // optimized.  FIXME: revisit this when we can custom lower all setcc
2252     // optimizations.
2253     if (C->isAllOnesValue() || C->isNullValue())
2254       return SDValue();
2255   }
2256
2257   // If we have an integer seteq/setne, turn it into a compare against zero
2258   // by xor'ing the rhs with the lhs, which is faster than setting a
2259   // condition register, reading it back out, and masking the correct bit.  The
2260   // normal approach here uses sub to do this instead of xor.  Using xor exposes
2261   // the result to other bit-twiddling opportunities.
2262   EVT LHSVT = Op.getOperand(0).getValueType();
2263   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2264     EVT VT = Op.getValueType();
2265     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
2266                                 Op.getOperand(1));
2267     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
2268   }
2269   return SDValue();
2270 }
2271
2272 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
2273                                       const PPCSubtarget &Subtarget) const {
2274   SDNode *Node = Op.getNode();
2275   EVT VT = Node->getValueType(0);
2276   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2277   SDValue InChain = Node->getOperand(0);
2278   SDValue VAListPtr = Node->getOperand(1);
2279   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
2280   SDLoc dl(Node);
2281
2282   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
2283
2284   // gpr_index
2285   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2286                                     VAListPtr, MachinePointerInfo(SV), MVT::i8,
2287                                     false, false, false, 0);
2288   InChain = GprIndex.getValue(1);
2289
2290   if (VT == MVT::i64) {
2291     // Check if GprIndex is even
2292     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
2293                                  DAG.getConstant(1, dl, MVT::i32));
2294     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
2295                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
2296     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
2297                                           DAG.getConstant(1, dl, MVT::i32));
2298     // Align GprIndex to be even if it isn't
2299     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
2300                            GprIndex);
2301   }
2302
2303   // fpr index is 1 byte after gpr
2304   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2305                                DAG.getConstant(1, dl, MVT::i32));
2306
2307   // fpr
2308   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
2309                                     FprPtr, MachinePointerInfo(SV), MVT::i8,
2310                                     false, false, false, 0);
2311   InChain = FprIndex.getValue(1);
2312
2313   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2314                                        DAG.getConstant(8, dl, MVT::i32));
2315
2316   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
2317                                         DAG.getConstant(4, dl, MVT::i32));
2318
2319   // areas
2320   SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
2321                                      MachinePointerInfo(), false, false,
2322                                      false, 0);
2323   InChain = OverflowArea.getValue(1);
2324
2325   SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
2326                                     MachinePointerInfo(), false, false,
2327                                     false, 0);
2328   InChain = RegSaveArea.getValue(1);
2329
2330   // select overflow_area if index > 8
2331   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
2332                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
2333
2334   // adjustment constant gpr_index * 4/8
2335   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
2336                                     VT.isInteger() ? GprIndex : FprIndex,
2337                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
2338                                                     MVT::i32));
2339
2340   // OurReg = RegSaveArea + RegConstant
2341   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
2342                                RegConstant);
2343
2344   // Floating types are 32 bytes into RegSaveArea
2345   if (VT.isFloatingPoint())
2346     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
2347                          DAG.getConstant(32, dl, MVT::i32));
2348
2349   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2350   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2351                                    VT.isInteger() ? GprIndex : FprIndex,
2352                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
2353                                                    MVT::i32));
2354
2355   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
2356                               VT.isInteger() ? VAListPtr : FprPtr,
2357                               MachinePointerInfo(SV),
2358                               MVT::i8, false, false, 0);
2359
2360   // determine if we should load from reg_save_area or overflow_area
2361   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
2362
2363   // increase overflow_area by 4/8 if gpr/fpr > 8
2364   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
2365                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
2366                                           dl, MVT::i32));
2367
2368   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
2369                              OverflowAreaPlusN);
2370
2371   InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
2372                               OverflowAreaPtr,
2373                               MachinePointerInfo(),
2374                               MVT::i32, false, false, 0);
2375
2376   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
2377                      false, false, false, 0);
2378 }
2379
2380 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
2381                                        const PPCSubtarget &Subtarget) const {
2382   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
2383
2384   // We have to copy the entire va_list struct:
2385   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
2386   return DAG.getMemcpy(Op.getOperand(0), Op,
2387                        Op.getOperand(1), Op.getOperand(2),
2388                        DAG.getConstant(12, SDLoc(Op), MVT::i32), 8, false, true,
2389                        false, MachinePointerInfo(), MachinePointerInfo());
2390 }
2391
2392 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
2393                                                   SelectionDAG &DAG) const {
2394   return Op.getOperand(0);
2395 }
2396
2397 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
2398                                                 SelectionDAG &DAG) const {
2399   SDValue Chain = Op.getOperand(0);
2400   SDValue Trmp = Op.getOperand(1); // trampoline
2401   SDValue FPtr = Op.getOperand(2); // nested function
2402   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
2403   SDLoc dl(Op);
2404
2405   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
2406   bool isPPC64 = (PtrVT == MVT::i64);
2407   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
2408
2409   TargetLowering::ArgListTy Args;
2410   TargetLowering::ArgListEntry Entry;
2411
2412   Entry.Ty = IntPtrTy;
2413   Entry.Node = Trmp; Args.push_back(Entry);
2414
2415   // TrampSize == (isPPC64 ? 48 : 40);
2416   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
2417                                isPPC64 ? MVT::i64 : MVT::i32);
2418   Args.push_back(Entry);
2419
2420   Entry.Node = FPtr; Args.push_back(Entry);
2421   Entry.Node = Nest; Args.push_back(Entry);
2422
2423   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
2424   TargetLowering::CallLoweringInfo CLI(DAG);
2425   CLI.setDebugLoc(dl).setChain(Chain)
2426     .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
2427                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
2428                std::move(Args), 0);
2429
2430   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2431   return CallResult.second;
2432 }
2433
2434 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
2435                                         const PPCSubtarget &Subtarget) const {
2436   MachineFunction &MF = DAG.getMachineFunction();
2437   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2438
2439   SDLoc dl(Op);
2440
2441   if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
2442     // vastart just stores the address of the VarArgsFrameIndex slot into the
2443     // memory location argument.
2444     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
2445     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2446     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2447     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
2448                         MachinePointerInfo(SV),
2449                         false, false, 0);
2450   }
2451
2452   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
2453   // We suppose the given va_list is already allocated.
2454   //
2455   // typedef struct {
2456   //  char gpr;     /* index into the array of 8 GPRs
2457   //                 * stored in the register save area
2458   //                 * gpr=0 corresponds to r3,
2459   //                 * gpr=1 to r4, etc.
2460   //                 */
2461   //  char fpr;     /* index into the array of 8 FPRs
2462   //                 * stored in the register save area
2463   //                 * fpr=0 corresponds to f1,
2464   //                 * fpr=1 to f2, etc.
2465   //                 */
2466   //  char *overflow_arg_area;
2467   //                /* location on stack that holds
2468   //                 * the next overflow argument
2469   //                 */
2470   //  char *reg_save_area;
2471   //               /* where r3:r10 and f1:f8 (if saved)
2472   //                * are stored
2473   //                */
2474   // } va_list[1];
2475
2476
2477   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
2478   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
2479
2480   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
2481
2482   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
2483                                             PtrVT);
2484   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
2485                                  PtrVT);
2486
2487   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
2488   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
2489
2490   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
2491   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
2492
2493   uint64_t FPROffset = 1;
2494   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
2495
2496   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2497
2498   // Store first byte : number of int regs
2499   SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
2500                                          Op.getOperand(1),
2501                                          MachinePointerInfo(SV),
2502                                          MVT::i8, false, false, 0);
2503   uint64_t nextOffset = FPROffset;
2504   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
2505                                   ConstFPROffset);
2506
2507   // Store second byte : number of float regs
2508   SDValue secondStore =
2509     DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
2510                       MachinePointerInfo(SV, nextOffset), MVT::i8,
2511                       false, false, 0);
2512   nextOffset += StackOffset;
2513   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
2514
2515   // Store second word : arguments given on stack
2516   SDValue thirdStore =
2517     DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
2518                  MachinePointerInfo(SV, nextOffset),
2519                  false, false, 0);
2520   nextOffset += FrameOffset;
2521   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
2522
2523   // Store third word : arguments given in registers
2524   return DAG.getStore(thirdStore, dl, FR, nextPtr,
2525                       MachinePointerInfo(SV, nextOffset),
2526                       false, false, 0);
2527
2528 }
2529
2530 #include "PPCGenCallingConv.inc"
2531
2532 // Function whose sole purpose is to kill compiler warnings
2533 // stemming from unused functions included from PPCGenCallingConv.inc.
2534 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
2535   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
2536 }
2537
2538 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
2539                                       CCValAssign::LocInfo &LocInfo,
2540                                       ISD::ArgFlagsTy &ArgFlags,
2541                                       CCState &State) {
2542   return true;
2543 }
2544
2545 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
2546                                              MVT &LocVT,
2547                                              CCValAssign::LocInfo &LocInfo,
2548                                              ISD::ArgFlagsTy &ArgFlags,
2549                                              CCState &State) {
2550   static const MCPhysReg ArgRegs[] = {
2551     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2552     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2553   };
2554   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2555
2556   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2557
2558   // Skip one register if the first unallocated register has an even register
2559   // number and there are still argument registers available which have not been
2560   // allocated yet. RegNum is actually an index into ArgRegs, which means we
2561   // need to skip a register if RegNum is odd.
2562   if (RegNum != NumArgRegs && RegNum % 2 == 1) {
2563     State.AllocateReg(ArgRegs[RegNum]);
2564   }
2565
2566   // Always return false here, as this function only makes sure that the first
2567   // unallocated register has an odd register number and does not actually
2568   // allocate a register for the current argument.
2569   return false;
2570 }
2571
2572 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
2573                                                MVT &LocVT,
2574                                                CCValAssign::LocInfo &LocInfo,
2575                                                ISD::ArgFlagsTy &ArgFlags,
2576                                                CCState &State) {
2577   static const MCPhysReg ArgRegs[] = {
2578     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2579     PPC::F8
2580   };
2581
2582   const unsigned NumArgRegs = array_lengthof(ArgRegs);
2583
2584   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
2585
2586   // If there is only one Floating-point register left we need to put both f64
2587   // values of a split ppc_fp128 value on the stack.
2588   if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
2589     State.AllocateReg(ArgRegs[RegNum]);
2590   }
2591
2592   // Always return false here, as this function only makes sure that the two f64
2593   // values a ppc_fp128 value is split into are both passed in registers or both
2594   // passed on the stack and does not actually allocate a register for the
2595   // current argument.
2596   return false;
2597 }
2598
2599 /// FPR - The set of FP registers that should be allocated for arguments,
2600 /// on Darwin.
2601 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
2602                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
2603                                 PPC::F11, PPC::F12, PPC::F13};
2604
2605 /// QFPR - The set of QPX registers that should be allocated for arguments.
2606 static const MCPhysReg QFPR[] = {
2607     PPC::QF1, PPC::QF2, PPC::QF3,  PPC::QF4,  PPC::QF5,  PPC::QF6, PPC::QF7,
2608     PPC::QF8, PPC::QF9, PPC::QF10, PPC::QF11, PPC::QF12, PPC::QF13};
2609
2610 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
2611 /// the stack.
2612 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
2613                                        unsigned PtrByteSize) {
2614   unsigned ArgSize = ArgVT.getStoreSize();
2615   if (Flags.isByVal())
2616     ArgSize = Flags.getByValSize();
2617
2618   // Round up to multiples of the pointer size, except for array members,
2619   // which are always packed.
2620   if (!Flags.isInConsecutiveRegs())
2621     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2622
2623   return ArgSize;
2624 }
2625
2626 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
2627 /// on the stack.
2628 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
2629                                             ISD::ArgFlagsTy Flags,
2630                                             unsigned PtrByteSize) {
2631   unsigned Align = PtrByteSize;
2632
2633   // Altivec parameters are padded to a 16 byte boundary.
2634   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2635       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2636       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2637       ArgVT == MVT::v1i128)
2638     Align = 16;
2639   // QPX vector types stored in double-precision are padded to a 32 byte
2640   // boundary.
2641   else if (ArgVT == MVT::v4f64 || ArgVT == MVT::v4i1)
2642     Align = 32;
2643
2644   // ByVal parameters are aligned as requested.
2645   if (Flags.isByVal()) {
2646     unsigned BVAlign = Flags.getByValAlign();
2647     if (BVAlign > PtrByteSize) {
2648       if (BVAlign % PtrByteSize != 0)
2649           llvm_unreachable(
2650             "ByVal alignment is not a multiple of the pointer size");
2651
2652       Align = BVAlign;
2653     }
2654   }
2655
2656   // Array members are always packed to their original alignment.
2657   if (Flags.isInConsecutiveRegs()) {
2658     // If the array member was split into multiple registers, the first
2659     // needs to be aligned to the size of the full type.  (Except for
2660     // ppcf128, which is only aligned as its f64 components.)
2661     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
2662       Align = OrigVT.getStoreSize();
2663     else
2664       Align = ArgVT.getStoreSize();
2665   }
2666
2667   return Align;
2668 }
2669
2670 /// CalculateStackSlotUsed - Return whether this argument will use its
2671 /// stack slot (instead of being passed in registers).  ArgOffset,
2672 /// AvailableFPRs, and AvailableVRs must hold the current argument
2673 /// position, and will be updated to account for this argument.
2674 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
2675                                    ISD::ArgFlagsTy Flags,
2676                                    unsigned PtrByteSize,
2677                                    unsigned LinkageSize,
2678                                    unsigned ParamAreaSize,
2679                                    unsigned &ArgOffset,
2680                                    unsigned &AvailableFPRs,
2681                                    unsigned &AvailableVRs, bool HasQPX) {
2682   bool UseMemory = false;
2683
2684   // Respect alignment of argument on the stack.
2685   unsigned Align =
2686     CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
2687   ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
2688   // If there's no space left in the argument save area, we must
2689   // use memory (this check also catches zero-sized arguments).
2690   if (ArgOffset >= LinkageSize + ParamAreaSize)
2691     UseMemory = true;
2692
2693   // Allocate argument on the stack.
2694   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
2695   if (Flags.isInConsecutiveRegsLast())
2696     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
2697   // If we overran the argument save area, we must use memory
2698   // (this check catches arguments passed partially in memory)
2699   if (ArgOffset > LinkageSize + ParamAreaSize)
2700     UseMemory = true;
2701
2702   // However, if the argument is actually passed in an FPR or a VR,
2703   // we don't use memory after all.
2704   if (!Flags.isByVal()) {
2705     if (ArgVT == MVT::f32 || ArgVT == MVT::f64 ||
2706         // QPX registers overlap with the scalar FP registers.
2707         (HasQPX && (ArgVT == MVT::v4f32 ||
2708                     ArgVT == MVT::v4f64 ||
2709                     ArgVT == MVT::v4i1)))
2710       if (AvailableFPRs > 0) {
2711         --AvailableFPRs;
2712         return false;
2713       }
2714     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
2715         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
2716         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
2717         ArgVT == MVT::v1i128)
2718       if (AvailableVRs > 0) {
2719         --AvailableVRs;
2720         return false;
2721       }
2722   }
2723
2724   return UseMemory;
2725 }
2726
2727 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
2728 /// ensure minimum alignment required for target.
2729 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
2730                                      unsigned NumBytes) {
2731   unsigned TargetAlign = Lowering->getStackAlignment();
2732   unsigned AlignMask = TargetAlign - 1;
2733   NumBytes = (NumBytes + AlignMask) & ~AlignMask;
2734   return NumBytes;
2735 }
2736
2737 SDValue
2738 PPCTargetLowering::LowerFormalArguments(SDValue Chain,
2739                                         CallingConv::ID CallConv, bool isVarArg,
2740                                         const SmallVectorImpl<ISD::InputArg>
2741                                           &Ins,
2742                                         SDLoc dl, SelectionDAG &DAG,
2743                                         SmallVectorImpl<SDValue> &InVals)
2744                                           const {
2745   if (Subtarget.isSVR4ABI()) {
2746     if (Subtarget.isPPC64())
2747       return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
2748                                          dl, DAG, InVals);
2749     else
2750       return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
2751                                          dl, DAG, InVals);
2752   } else {
2753     return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
2754                                        dl, DAG, InVals);
2755   }
2756 }
2757
2758 SDValue
2759 PPCTargetLowering::LowerFormalArguments_32SVR4(
2760                                       SDValue Chain,
2761                                       CallingConv::ID CallConv, bool isVarArg,
2762                                       const SmallVectorImpl<ISD::InputArg>
2763                                         &Ins,
2764                                       SDLoc dl, SelectionDAG &DAG,
2765                                       SmallVectorImpl<SDValue> &InVals) const {
2766
2767   // 32-bit SVR4 ABI Stack Frame Layout:
2768   //              +-----------------------------------+
2769   //        +-->  |            Back chain             |
2770   //        |     +-----------------------------------+
2771   //        |     | Floating-point register save area |
2772   //        |     +-----------------------------------+
2773   //        |     |    General register save area     |
2774   //        |     +-----------------------------------+
2775   //        |     |          CR save word             |
2776   //        |     +-----------------------------------+
2777   //        |     |         VRSAVE save word          |
2778   //        |     +-----------------------------------+
2779   //        |     |         Alignment padding         |
2780   //        |     +-----------------------------------+
2781   //        |     |     Vector register save area     |
2782   //        |     +-----------------------------------+
2783   //        |     |       Local variable space        |
2784   //        |     +-----------------------------------+
2785   //        |     |        Parameter list area        |
2786   //        |     +-----------------------------------+
2787   //        |     |           LR save word            |
2788   //        |     +-----------------------------------+
2789   // SP-->  +---  |            Back chain             |
2790   //              +-----------------------------------+
2791   //
2792   // Specifications:
2793   //   System V Application Binary Interface PowerPC Processor Supplement
2794   //   AltiVec Technology Programming Interface Manual
2795
2796   MachineFunction &MF = DAG.getMachineFunction();
2797   MachineFrameInfo *MFI = MF.getFrameInfo();
2798   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2799
2800   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
2801   // Potential tail calls could cause overwriting of argument stack slots.
2802   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
2803                        (CallConv == CallingConv::Fast));
2804   unsigned PtrByteSize = 4;
2805
2806   // Assign locations to all of the incoming arguments.
2807   SmallVector<CCValAssign, 16> ArgLocs;
2808   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2809                  *DAG.getContext());
2810
2811   // Reserve space for the linkage area on the stack.
2812   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
2813   CCInfo.AllocateStack(LinkageSize, PtrByteSize);
2814
2815   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
2816
2817   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2818     CCValAssign &VA = ArgLocs[i];
2819
2820     // Arguments stored in registers.
2821     if (VA.isRegLoc()) {
2822       const TargetRegisterClass *RC;
2823       EVT ValVT = VA.getValVT();
2824
2825       switch (ValVT.getSimpleVT().SimpleTy) {
2826         default:
2827           llvm_unreachable("ValVT not supported by formal arguments Lowering");
2828         case MVT::i1:
2829         case MVT::i32:
2830           RC = &PPC::GPRCRegClass;
2831           break;
2832         case MVT::f32:
2833           if (Subtarget.hasP8Vector())
2834             RC = &PPC::VSSRCRegClass;
2835           else
2836             RC = &PPC::F4RCRegClass;
2837           break;
2838         case MVT::f64:
2839           if (Subtarget.hasVSX())
2840             RC = &PPC::VSFRCRegClass;
2841           else
2842             RC = &PPC::F8RCRegClass;
2843           break;
2844         case MVT::v16i8:
2845         case MVT::v8i16:
2846         case MVT::v4i32:
2847           RC = &PPC::VRRCRegClass;
2848           break;
2849         case MVT::v4f32:
2850           RC = Subtarget.hasQPX() ? &PPC::QSRCRegClass : &PPC::VRRCRegClass;
2851           break;
2852         case MVT::v2f64:
2853         case MVT::v2i64:
2854           RC = &PPC::VSHRCRegClass;
2855           break;
2856         case MVT::v4f64:
2857           RC = &PPC::QFRCRegClass;
2858           break;
2859         case MVT::v4i1:
2860           RC = &PPC::QBRCRegClass;
2861           break;
2862       }
2863
2864       // Transform the arguments stored in physical registers into virtual ones.
2865       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2866       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
2867                                             ValVT == MVT::i1 ? MVT::i32 : ValVT);
2868
2869       if (ValVT == MVT::i1)
2870         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
2871
2872       InVals.push_back(ArgValue);
2873     } else {
2874       // Argument stored in memory.
2875       assert(VA.isMemLoc());
2876
2877       unsigned ArgSize = VA.getLocVT().getStoreSize();
2878       int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
2879                                       isImmutable);
2880
2881       // Create load nodes to retrieve arguments from the stack.
2882       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2883       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
2884                                    MachinePointerInfo(),
2885                                    false, false, false, 0));
2886     }
2887   }
2888
2889   // Assign locations to all of the incoming aggregate by value arguments.
2890   // Aggregates passed by value are stored in the local variable space of the
2891   // caller's stack frame, right above the parameter list area.
2892   SmallVector<CCValAssign, 16> ByValArgLocs;
2893   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2894                       ByValArgLocs, *DAG.getContext());
2895
2896   // Reserve stack space for the allocations in CCInfo.
2897   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
2898
2899   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
2900
2901   // Area that is at least reserved in the caller of this function.
2902   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
2903   MinReservedArea = std::max(MinReservedArea, LinkageSize);
2904
2905   // Set the size that is at least reserved in caller of this function.  Tail
2906   // call optimized function's reserved stack space needs to be aligned so that
2907   // taking the difference between two stack areas will result in an aligned
2908   // stack.
2909   MinReservedArea =
2910       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
2911   FuncInfo->setMinReservedArea(MinReservedArea);
2912
2913   SmallVector<SDValue, 8> MemOps;
2914
2915   // If the function takes variable number of arguments, make a frame index for
2916   // the start of the first vararg value... for expansion of llvm.va_start.
2917   if (isVarArg) {
2918     static const MCPhysReg GPArgRegs[] = {
2919       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
2920       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
2921     };
2922     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
2923
2924     static const MCPhysReg FPArgRegs[] = {
2925       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
2926       PPC::F8
2927     };
2928     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
2929     if (DisablePPCFloatInVariadic)
2930       NumFPArgRegs = 0;
2931
2932     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
2933     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
2934
2935     // Make room for NumGPArgRegs and NumFPArgRegs.
2936     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
2937                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
2938
2939     FuncInfo->setVarArgsStackOffset(
2940       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
2941                              CCInfo.getNextStackOffset(), true));
2942
2943     FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
2944     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
2945
2946     // The fixed integer arguments of a variadic function are stored to the
2947     // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
2948     // the result of va_next.
2949     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
2950       // Get an existing live-in vreg, or add a new one.
2951       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
2952       if (!VReg)
2953         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
2954
2955       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
2956       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2957                                    MachinePointerInfo(), false, false, 0);
2958       MemOps.push_back(Store);
2959       // Increment the address by four for the next argument to store
2960       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
2961       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2962     }
2963
2964     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
2965     // is set.
2966     // The double arguments are stored to the VarArgsFrameIndex
2967     // on the stack.
2968     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
2969       // Get an existing live-in vreg, or add a new one.
2970       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
2971       if (!VReg)
2972         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
2973
2974       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
2975       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
2976                                    MachinePointerInfo(), false, false, 0);
2977       MemOps.push_back(Store);
2978       // Increment the address by eight for the next argument to store
2979       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
2980                                          PtrVT);
2981       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
2982     }
2983   }
2984
2985   if (!MemOps.empty())
2986     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
2987
2988   return Chain;
2989 }
2990
2991 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
2992 // value to MVT::i64 and then truncate to the correct register size.
2993 SDValue
2994 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
2995                                      SelectionDAG &DAG, SDValue ArgVal,
2996                                      SDLoc dl) const {
2997   if (Flags.isSExt())
2998     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
2999                          DAG.getValueType(ObjectVT));
3000   else if (Flags.isZExt())
3001     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
3002                          DAG.getValueType(ObjectVT));
3003
3004   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
3005 }
3006
3007 SDValue
3008 PPCTargetLowering::LowerFormalArguments_64SVR4(
3009                                       SDValue Chain,
3010                                       CallingConv::ID CallConv, bool isVarArg,
3011                                       const SmallVectorImpl<ISD::InputArg>
3012                                         &Ins,
3013                                       SDLoc dl, SelectionDAG &DAG,
3014                                       SmallVectorImpl<SDValue> &InVals) const {
3015   // TODO: add description of PPC stack frame format, or at least some docs.
3016   //
3017   bool isELFv2ABI = Subtarget.isELFv2ABI();
3018   bool isLittleEndian = Subtarget.isLittleEndian();
3019   MachineFunction &MF = DAG.getMachineFunction();
3020   MachineFrameInfo *MFI = MF.getFrameInfo();
3021   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3022
3023   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
3024          "fastcc not supported on varargs functions");
3025
3026   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
3027   // Potential tail calls could cause overwriting of argument stack slots.
3028   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3029                        (CallConv == CallingConv::Fast));
3030   unsigned PtrByteSize = 8;
3031   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3032
3033   static const MCPhysReg GPR[] = {
3034     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3035     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3036   };
3037   static const MCPhysReg VR[] = {
3038     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3039     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3040   };
3041   static const MCPhysReg VSRH[] = {
3042     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
3043     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
3044   };
3045
3046   const unsigned Num_GPR_Regs = array_lengthof(GPR);
3047   const unsigned Num_FPR_Regs = 13;
3048   const unsigned Num_VR_Regs  = array_lengthof(VR);
3049   const unsigned Num_QFPR_Regs = Num_FPR_Regs;
3050
3051   // Do a first pass over the arguments to determine whether the ABI
3052   // guarantees that our caller has allocated the parameter save area
3053   // on its stack frame.  In the ELFv1 ABI, this is always the case;
3054   // in the ELFv2 ABI, it is true if this is a vararg function or if
3055   // any parameter is located in a stack slot.
3056
3057   bool HasParameterArea = !isELFv2ABI || isVarArg;
3058   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
3059   unsigned NumBytes = LinkageSize;
3060   unsigned AvailableFPRs = Num_FPR_Regs;
3061   unsigned AvailableVRs = Num_VR_Regs;
3062   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3063     if (Ins[i].Flags.isNest())
3064       continue;
3065
3066     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
3067                                PtrByteSize, LinkageSize, ParamAreaSize,
3068                                NumBytes, AvailableFPRs, AvailableVRs,
3069                                Subtarget.hasQPX()))
3070       HasParameterArea = true;
3071   }
3072
3073   // Add DAG nodes to load the arguments or copy them out of registers.  On
3074   // entry to a function on PPC, the arguments start after the linkage area,
3075   // although the first ones are often in registers.
3076
3077   unsigned ArgOffset = LinkageSize;
3078   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3079   unsigned &QFPR_idx = FPR_idx;
3080   SmallVector<SDValue, 8> MemOps;
3081   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3082   unsigned CurArgIdx = 0;
3083   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3084     SDValue ArgVal;
3085     bool needsLoad = false;
3086     EVT ObjectVT = Ins[ArgNo].VT;
3087     EVT OrigVT = Ins[ArgNo].ArgVT;
3088     unsigned ObjSize = ObjectVT.getStoreSize();
3089     unsigned ArgSize = ObjSize;
3090     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3091     if (Ins[ArgNo].isOrigArg()) {
3092       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3093       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3094     }
3095     // We re-align the argument offset for each argument, except when using the
3096     // fast calling convention, when we need to make sure we do that only when
3097     // we'll actually use a stack slot.
3098     unsigned CurArgOffset, Align;
3099     auto ComputeArgOffset = [&]() {
3100       /* Respect alignment of argument on the stack.  */
3101       Align = CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
3102       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
3103       CurArgOffset = ArgOffset;
3104     };
3105
3106     if (CallConv != CallingConv::Fast) {
3107       ComputeArgOffset();
3108
3109       /* Compute GPR index associated with argument offset.  */
3110       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3111       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
3112     }
3113
3114     // FIXME the codegen can be much improved in some cases.
3115     // We do not have to keep everything in memory.
3116     if (Flags.isByVal()) {
3117       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3118
3119       if (CallConv == CallingConv::Fast)
3120         ComputeArgOffset();
3121
3122       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3123       ObjSize = Flags.getByValSize();
3124       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3125       // Empty aggregate parameters do not take up registers.  Examples:
3126       //   struct { } a;
3127       //   union  { } b;
3128       //   int c[0];
3129       // etc.  However, we have to provide a place-holder in InVals, so
3130       // pretend we have an 8-byte item at the current address for that
3131       // purpose.
3132       if (!ObjSize) {
3133         int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
3134         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3135         InVals.push_back(FIN);
3136         continue;
3137       }
3138
3139       // Create a stack object covering all stack doublewords occupied
3140       // by the argument.  If the argument is (fully or partially) on
3141       // the stack, or if the argument is fully in registers but the
3142       // caller has allocated the parameter save anyway, we can refer
3143       // directly to the caller's stack frame.  Otherwise, create a
3144       // local copy in our own frame.
3145       int FI;
3146       if (HasParameterArea ||
3147           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
3148         FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
3149       else
3150         FI = MFI->CreateStackObject(ArgSize, Align, false);
3151       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3152
3153       // Handle aggregates smaller than 8 bytes.
3154       if (ObjSize < PtrByteSize) {
3155         // The value of the object is its address, which differs from the
3156         // address of the enclosing doubleword on big-endian systems.
3157         SDValue Arg = FIN;
3158         if (!isLittleEndian) {
3159           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
3160           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
3161         }
3162         InVals.push_back(Arg);
3163
3164         if (GPR_idx != Num_GPR_Regs) {
3165           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3166           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3167           SDValue Store;
3168
3169           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
3170             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
3171                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
3172             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
3173                                       MachinePointerInfo(FuncArg),
3174                                       ObjType, false, false, 0);
3175           } else {
3176             // For sizes that don't fit a truncating store (3, 5, 6, 7),
3177             // store the whole register as-is to the parameter save area
3178             // slot.
3179             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3180                                  MachinePointerInfo(FuncArg),
3181                                  false, false, 0);
3182           }
3183
3184           MemOps.push_back(Store);
3185         }
3186         // Whether we copied from a register or not, advance the offset
3187         // into the parameter save area by a full doubleword.
3188         ArgOffset += PtrByteSize;
3189         continue;
3190       }
3191
3192       // The value of the object is its address, which is the address of
3193       // its first stack doubleword.
3194       InVals.push_back(FIN);
3195
3196       // Store whatever pieces of the object are in registers to memory.
3197       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3198         if (GPR_idx == Num_GPR_Regs)
3199           break;
3200
3201         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3202         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3203         SDValue Addr = FIN;
3204         if (j) {
3205           SDValue Off = DAG.getConstant(j, dl, PtrVT);
3206           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
3207         }
3208         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
3209                                      MachinePointerInfo(FuncArg, j),
3210                                      false, false, 0);
3211         MemOps.push_back(Store);
3212         ++GPR_idx;
3213       }
3214       ArgOffset += ArgSize;
3215       continue;
3216     }
3217
3218     switch (ObjectVT.getSimpleVT().SimpleTy) {
3219     default: llvm_unreachable("Unhandled argument type!");
3220     case MVT::i1:
3221     case MVT::i32:
3222     case MVT::i64:
3223       if (Flags.isNest()) {
3224         // The 'nest' parameter, if any, is passed in R11.
3225         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
3226         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3227
3228         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3229           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3230
3231         break;
3232       }
3233
3234       // These can be scalar arguments or elements of an integer array type
3235       // passed directly.  Clang may use those instead of "byval" aggregate
3236       // types to avoid forcing arguments to memory unnecessarily.
3237       if (GPR_idx != Num_GPR_Regs) {
3238         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3239         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3240
3241         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3242           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3243           // value to MVT::i64 and then truncate to the correct register size.
3244           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3245       } else {
3246         if (CallConv == CallingConv::Fast)
3247           ComputeArgOffset();
3248
3249         needsLoad = true;
3250         ArgSize = PtrByteSize;
3251       }
3252       if (CallConv != CallingConv::Fast || needsLoad)
3253         ArgOffset += 8;
3254       break;
3255
3256     case MVT::f32:
3257     case MVT::f64:
3258       // These can be scalar arguments or elements of a float array type
3259       // passed directly.  The latter are used to implement ELFv2 homogenous
3260       // float aggregates.
3261       if (FPR_idx != Num_FPR_Regs) {
3262         unsigned VReg;
3263
3264         if (ObjectVT == MVT::f32)
3265           VReg = MF.addLiveIn(FPR[FPR_idx],
3266                               Subtarget.hasP8Vector()
3267                                   ? &PPC::VSSRCRegClass
3268                                   : &PPC::F4RCRegClass);
3269         else
3270           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
3271                                                 ? &PPC::VSFRCRegClass
3272                                                 : &PPC::F8RCRegClass);
3273
3274         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3275         ++FPR_idx;
3276       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
3277         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3278         // once we support fp <-> gpr moves.
3279
3280         // This can only ever happen in the presence of f32 array types,
3281         // since otherwise we never run out of FPRs before running out
3282         // of GPRs.
3283         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
3284         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3285
3286         if (ObjectVT == MVT::f32) {
3287           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
3288             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
3289                                  DAG.getConstant(32, dl, MVT::i32));
3290           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
3291         }
3292
3293         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
3294       } else {
3295         if (CallConv == CallingConv::Fast)
3296           ComputeArgOffset();
3297
3298         needsLoad = true;
3299       }
3300
3301       // When passing an array of floats, the array occupies consecutive
3302       // space in the argument area; only round up to the next doubleword
3303       // at the end of the array.  Otherwise, each float takes 8 bytes.
3304       if (CallConv != CallingConv::Fast || needsLoad) {
3305         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
3306         ArgOffset += ArgSize;
3307         if (Flags.isInConsecutiveRegsLast())
3308           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3309       }
3310       break;
3311     case MVT::v4f32:
3312     case MVT::v4i32:
3313     case MVT::v8i16:
3314     case MVT::v16i8:
3315     case MVT::v2f64:
3316     case MVT::v2i64:
3317     case MVT::v1i128:
3318       if (!Subtarget.hasQPX()) {
3319       // These can be scalar arguments or elements of a vector array type
3320       // passed directly.  The latter are used to implement ELFv2 homogenous
3321       // vector aggregates.
3322       if (VR_idx != Num_VR_Regs) {
3323         unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
3324                         MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
3325                         MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3326         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3327         ++VR_idx;
3328       } else {
3329         if (CallConv == CallingConv::Fast)
3330           ComputeArgOffset();
3331
3332         needsLoad = true;
3333       }
3334       if (CallConv != CallingConv::Fast || needsLoad)
3335         ArgOffset += 16;
3336       break;
3337       } // not QPX
3338
3339       assert(ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 &&
3340              "Invalid QPX parameter type");
3341       /* fall through */
3342
3343     case MVT::v4f64:
3344     case MVT::v4i1:
3345       // QPX vectors are treated like their scalar floating-point subregisters
3346       // (except that they're larger).
3347       unsigned Sz = ObjectVT.getSimpleVT().SimpleTy == MVT::v4f32 ? 16 : 32;
3348       if (QFPR_idx != Num_QFPR_Regs) {
3349         const TargetRegisterClass *RC;
3350         switch (ObjectVT.getSimpleVT().SimpleTy) {
3351         case MVT::v4f64: RC = &PPC::QFRCRegClass; break;
3352         case MVT::v4f32: RC = &PPC::QSRCRegClass; break;
3353         default:         RC = &PPC::QBRCRegClass; break;
3354         }
3355
3356         unsigned VReg = MF.addLiveIn(QFPR[QFPR_idx], RC);
3357         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3358         ++QFPR_idx;
3359       } else {
3360         if (CallConv == CallingConv::Fast)
3361           ComputeArgOffset();
3362         needsLoad = true;
3363       }
3364       if (CallConv != CallingConv::Fast || needsLoad)
3365         ArgOffset += Sz;
3366       break;
3367     }
3368
3369     // We need to load the argument to a virtual register if we determined
3370     // above that we ran out of physical registers of the appropriate type.
3371     if (needsLoad) {
3372       if (ObjSize < ArgSize && !isLittleEndian)
3373         CurArgOffset += ArgSize - ObjSize;
3374       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
3375       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3376       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
3377                            false, false, false, 0);
3378     }
3379
3380     InVals.push_back(ArgVal);
3381   }
3382
3383   // Area that is at least reserved in the caller of this function.
3384   unsigned MinReservedArea;
3385   if (HasParameterArea)
3386     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
3387   else
3388     MinReservedArea = LinkageSize;
3389
3390   // Set the size that is at least reserved in caller of this function.  Tail
3391   // call optimized functions' reserved stack space needs to be aligned so that
3392   // taking the difference between two stack areas will result in an aligned
3393   // stack.
3394   MinReservedArea =
3395       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3396   FuncInfo->setMinReservedArea(MinReservedArea);
3397
3398   // If the function takes variable number of arguments, make a frame index for
3399   // the start of the first vararg value... for expansion of llvm.va_start.
3400   if (isVarArg) {
3401     int Depth = ArgOffset;
3402
3403     FuncInfo->setVarArgsFrameIndex(
3404       MFI->CreateFixedObject(PtrByteSize, Depth, true));
3405     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3406
3407     // If this function is vararg, store any remaining integer argument regs
3408     // to their spots on the stack so that they may be loaded by deferencing the
3409     // result of va_next.
3410     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
3411          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
3412       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3413       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3414       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3415                                    MachinePointerInfo(), false, false, 0);
3416       MemOps.push_back(Store);
3417       // Increment the address by four for the next argument to store
3418       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
3419       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3420     }
3421   }
3422
3423   if (!MemOps.empty())
3424     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3425
3426   return Chain;
3427 }
3428
3429 SDValue
3430 PPCTargetLowering::LowerFormalArguments_Darwin(
3431                                       SDValue Chain,
3432                                       CallingConv::ID CallConv, bool isVarArg,
3433                                       const SmallVectorImpl<ISD::InputArg>
3434                                         &Ins,
3435                                       SDLoc dl, SelectionDAG &DAG,
3436                                       SmallVectorImpl<SDValue> &InVals) const {
3437   // TODO: add description of PPC stack frame format, or at least some docs.
3438   //
3439   MachineFunction &MF = DAG.getMachineFunction();
3440   MachineFrameInfo *MFI = MF.getFrameInfo();
3441   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3442
3443   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
3444   bool isPPC64 = PtrVT == MVT::i64;
3445   // Potential tail calls could cause overwriting of argument stack slots.
3446   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3447                        (CallConv == CallingConv::Fast));
3448   unsigned PtrByteSize = isPPC64 ? 8 : 4;
3449   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3450   unsigned ArgOffset = LinkageSize;
3451   // Area that is at least reserved in caller of this function.
3452   unsigned MinReservedArea = ArgOffset;
3453
3454   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
3455     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3456     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3457   };
3458   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
3459     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
3460     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
3461   };
3462   static const MCPhysReg VR[] = {
3463     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
3464     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
3465   };
3466
3467   const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
3468   const unsigned Num_FPR_Regs = 13;
3469   const unsigned Num_VR_Regs  = array_lengthof( VR);
3470
3471   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
3472
3473   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
3474
3475   // In 32-bit non-varargs functions, the stack space for vectors is after the
3476   // stack space for non-vectors.  We do not use this space unless we have
3477   // too many vectors to fit in registers, something that only occurs in
3478   // constructed examples:), but we have to walk the arglist to figure
3479   // that out...for the pathological case, compute VecArgOffset as the
3480   // start of the vector parameter area.  Computing VecArgOffset is the
3481   // entire point of the following loop.
3482   unsigned VecArgOffset = ArgOffset;
3483   if (!isVarArg && !isPPC64) {
3484     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
3485          ++ArgNo) {
3486       EVT ObjectVT = Ins[ArgNo].VT;
3487       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3488
3489       if (Flags.isByVal()) {
3490         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
3491         unsigned ObjSize = Flags.getByValSize();
3492         unsigned ArgSize =
3493                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3494         VecArgOffset += ArgSize;
3495         continue;
3496       }
3497
3498       switch(ObjectVT.getSimpleVT().SimpleTy) {
3499       default: llvm_unreachable("Unhandled argument type!");
3500       case MVT::i1:
3501       case MVT::i32:
3502       case MVT::f32:
3503         VecArgOffset += 4;
3504         break;
3505       case MVT::i64:  // PPC64
3506       case MVT::f64:
3507         // FIXME: We are guaranteed to be !isPPC64 at this point.
3508         // Does MVT::i64 apply?
3509         VecArgOffset += 8;
3510         break;
3511       case MVT::v4f32:
3512       case MVT::v4i32:
3513       case MVT::v8i16:
3514       case MVT::v16i8:
3515         // Nothing to do, we're only looking at Nonvector args here.
3516         break;
3517       }
3518     }
3519   }
3520   // We've found where the vector parameter area in memory is.  Skip the
3521   // first 12 parameters; these don't use that memory.
3522   VecArgOffset = ((VecArgOffset+15)/16)*16;
3523   VecArgOffset += 12*16;
3524
3525   // Add DAG nodes to load the arguments or copy them out of registers.  On
3526   // entry to a function on PPC, the arguments start after the linkage area,
3527   // although the first ones are often in registers.
3528
3529   SmallVector<SDValue, 8> MemOps;
3530   unsigned nAltivecParamsAtEnd = 0;
3531   Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
3532   unsigned CurArgIdx = 0;
3533   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
3534     SDValue ArgVal;
3535     bool needsLoad = false;
3536     EVT ObjectVT = Ins[ArgNo].VT;
3537     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
3538     unsigned ArgSize = ObjSize;
3539     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
3540     if (Ins[ArgNo].isOrigArg()) {
3541       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
3542       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
3543     }
3544     unsigned CurArgOffset = ArgOffset;
3545
3546     // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
3547     if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
3548         ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
3549       if (isVarArg || isPPC64) {
3550         MinReservedArea = ((MinReservedArea+15)/16)*16;
3551         MinReservedArea += CalculateStackSlotSize(ObjectVT,
3552                                                   Flags,
3553                                                   PtrByteSize);
3554       } else  nAltivecParamsAtEnd++;
3555     } else
3556       // Calculate min reserved area.
3557       MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
3558                                                 Flags,
3559                                                 PtrByteSize);
3560
3561     // FIXME the codegen can be much improved in some cases.
3562     // We do not have to keep everything in memory.
3563     if (Flags.isByVal()) {
3564       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
3565
3566       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3567       ObjSize = Flags.getByValSize();
3568       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3569       // Objects of size 1 and 2 are right justified, everything else is
3570       // left justified.  This means the memory address is adjusted forwards.
3571       if (ObjSize==1 || ObjSize==2) {
3572         CurArgOffset = CurArgOffset + (4 - ObjSize);
3573       }
3574       // The value of the object is its address.
3575       int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
3576       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3577       InVals.push_back(FIN);
3578       if (ObjSize==1 || ObjSize==2) {
3579         if (GPR_idx != Num_GPR_Regs) {
3580           unsigned VReg;
3581           if (isPPC64)
3582             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3583           else
3584             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3585           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3586           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
3587           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
3588                                             MachinePointerInfo(FuncArg),
3589                                             ObjType, false, false, 0);
3590           MemOps.push_back(Store);
3591           ++GPR_idx;
3592         }
3593
3594         ArgOffset += PtrByteSize;
3595
3596         continue;
3597       }
3598       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
3599         // Store whatever pieces of the object are in registers
3600         // to memory.  ArgOffset will be the address of the beginning
3601         // of the object.
3602         if (GPR_idx != Num_GPR_Regs) {
3603           unsigned VReg;
3604           if (isPPC64)
3605             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3606           else
3607             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3608           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
3609           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3610           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3611           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3612                                        MachinePointerInfo(FuncArg, j),
3613                                        false, false, 0);
3614           MemOps.push_back(Store);
3615           ++GPR_idx;
3616           ArgOffset += PtrByteSize;
3617         } else {
3618           ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
3619           break;
3620         }
3621       }
3622       continue;
3623     }
3624
3625     switch (ObjectVT.getSimpleVT().SimpleTy) {
3626     default: llvm_unreachable("Unhandled argument type!");
3627     case MVT::i1:
3628     case MVT::i32:
3629       if (!isPPC64) {
3630         if (GPR_idx != Num_GPR_Regs) {
3631           unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3632           ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3633
3634           if (ObjectVT == MVT::i1)
3635             ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
3636
3637           ++GPR_idx;
3638         } else {
3639           needsLoad = true;
3640           ArgSize = PtrByteSize;
3641         }
3642         // All int arguments reserve stack space in the Darwin ABI.
3643         ArgOffset += PtrByteSize;
3644         break;
3645       }
3646       // FALLTHROUGH
3647     case MVT::i64:  // PPC64
3648       if (GPR_idx != Num_GPR_Regs) {
3649         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3650         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
3651
3652         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
3653           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3654           // value to MVT::i64 and then truncate to the correct register size.
3655           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
3656
3657         ++GPR_idx;
3658       } else {
3659         needsLoad = true;
3660         ArgSize = PtrByteSize;
3661       }
3662       // All int arguments reserve stack space in the Darwin ABI.
3663       ArgOffset += 8;
3664       break;
3665
3666     case MVT::f32:
3667     case MVT::f64:
3668       // Every 4 bytes of argument space consumes one of the GPRs available for
3669       // argument passing.
3670       if (GPR_idx != Num_GPR_Regs) {
3671         ++GPR_idx;
3672         if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
3673           ++GPR_idx;
3674       }
3675       if (FPR_idx != Num_FPR_Regs) {
3676         unsigned VReg;
3677
3678         if (ObjectVT == MVT::f32)
3679           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
3680         else
3681           VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
3682
3683         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3684         ++FPR_idx;
3685       } else {
3686         needsLoad = true;
3687       }
3688
3689       // All FP arguments reserve stack space in the Darwin ABI.
3690       ArgOffset += isPPC64 ? 8 : ObjSize;
3691       break;
3692     case MVT::v4f32:
3693     case MVT::v4i32:
3694     case MVT::v8i16:
3695     case MVT::v16i8:
3696       // Note that vector arguments in registers don't reserve stack space,
3697       // except in varargs functions.
3698       if (VR_idx != Num_VR_Regs) {
3699         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
3700         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
3701         if (isVarArg) {
3702           while ((ArgOffset % 16) != 0) {
3703             ArgOffset += PtrByteSize;
3704             if (GPR_idx != Num_GPR_Regs)
3705               GPR_idx++;
3706           }
3707           ArgOffset += 16;
3708           GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
3709         }
3710         ++VR_idx;
3711       } else {
3712         if (!isVarArg && !isPPC64) {
3713           // Vectors go after all the nonvectors.
3714           CurArgOffset = VecArgOffset;
3715           VecArgOffset += 16;
3716         } else {
3717           // Vectors are aligned.
3718           ArgOffset = ((ArgOffset+15)/16)*16;
3719           CurArgOffset = ArgOffset;
3720           ArgOffset += 16;
3721         }
3722         needsLoad = true;
3723       }
3724       break;
3725     }
3726
3727     // We need to load the argument to a virtual register if we determined above
3728     // that we ran out of physical registers of the appropriate type.
3729     if (needsLoad) {
3730       int FI = MFI->CreateFixedObject(ObjSize,
3731                                       CurArgOffset + (ArgSize - ObjSize),
3732                                       isImmutable);
3733       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3734       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
3735                            false, false, false, 0);
3736     }
3737
3738     InVals.push_back(ArgVal);
3739   }
3740
3741   // Allow for Altivec parameters at the end, if needed.
3742   if (nAltivecParamsAtEnd) {
3743     MinReservedArea = ((MinReservedArea+15)/16)*16;
3744     MinReservedArea += 16*nAltivecParamsAtEnd;
3745   }
3746
3747   // Area that is at least reserved in the caller of this function.
3748   MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
3749
3750   // Set the size that is at least reserved in caller of this function.  Tail
3751   // call optimized functions' reserved stack space needs to be aligned so that
3752   // taking the difference between two stack areas will result in an aligned
3753   // stack.
3754   MinReservedArea =
3755       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3756   FuncInfo->setMinReservedArea(MinReservedArea);
3757
3758   // If the function takes variable number of arguments, make a frame index for
3759   // the start of the first vararg value... for expansion of llvm.va_start.
3760   if (isVarArg) {
3761     int Depth = ArgOffset;
3762
3763     FuncInfo->setVarArgsFrameIndex(
3764       MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
3765                              Depth, true));
3766     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3767
3768     // If this function is vararg, store any remaining integer argument regs
3769     // to their spots on the stack so that they may be loaded by deferencing the
3770     // result of va_next.
3771     for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
3772       unsigned VReg;
3773
3774       if (isPPC64)
3775         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
3776       else
3777         VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
3778
3779       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3780       SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3781                                    MachinePointerInfo(), false, false, 0);
3782       MemOps.push_back(Store);
3783       // Increment the address by four for the next argument to store
3784       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3785       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3786     }
3787   }
3788
3789   if (!MemOps.empty())
3790     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3791
3792   return Chain;
3793 }
3794
3795 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
3796 /// adjusted to accommodate the arguments for the tailcall.
3797 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
3798                                    unsigned ParamSize) {
3799
3800   if (!isTailCall) return 0;
3801
3802   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
3803   unsigned CallerMinReservedArea = FI->getMinReservedArea();
3804   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
3805   // Remember only if the new adjustement is bigger.
3806   if (SPDiff < FI->getTailCallSPDelta())
3807     FI->setTailCallSPDelta(SPDiff);
3808
3809   return SPDiff;
3810 }
3811
3812 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3813 /// for tail call optimization. Targets which want to do tail call
3814 /// optimization should implement this function.
3815 bool
3816 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
3817                                                      CallingConv::ID CalleeCC,
3818                                                      bool isVarArg,
3819                                       const SmallVectorImpl<ISD::InputArg> &Ins,
3820                                                      SelectionDAG& DAG) const {
3821   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
3822     return false;
3823
3824   // Variable argument functions are not supported.
3825   if (isVarArg)
3826     return false;
3827
3828   MachineFunction &MF = DAG.getMachineFunction();
3829   CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
3830   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
3831     // Functions containing by val parameters are not supported.
3832     for (unsigned i = 0; i != Ins.size(); i++) {
3833        ISD::ArgFlagsTy Flags = Ins[i].Flags;
3834        if (Flags.isByVal()) return false;
3835     }
3836
3837     // Non-PIC/GOT tail calls are supported.
3838     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
3839       return true;
3840
3841     // At the moment we can only do local tail calls (in same module, hidden
3842     // or protected) if we are generating PIC.
3843     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
3844       return G->getGlobal()->hasHiddenVisibility()
3845           || G->getGlobal()->hasProtectedVisibility();
3846   }
3847
3848   return false;
3849 }
3850
3851 /// isCallCompatibleAddress - Return the immediate to use if the specified
3852 /// 32-bit value is representable in the immediate field of a BxA instruction.
3853 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
3854   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
3855   if (!C) return nullptr;
3856
3857   int Addr = C->getZExtValue();
3858   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
3859       SignExtend32<26>(Addr) != Addr)
3860     return nullptr;  // Top 6 bits have to be sext of immediate.
3861
3862   return DAG.getConstant((int)C->getZExtValue() >> 2, SDLoc(Op),
3863                          DAG.getTargetLoweringInfo().getPointerTy(
3864                              DAG.getDataLayout())).getNode();
3865 }
3866
3867 namespace {
3868
3869 struct TailCallArgumentInfo {
3870   SDValue Arg;
3871   SDValue FrameIdxOp;
3872   int       FrameIdx;
3873
3874   TailCallArgumentInfo() : FrameIdx(0) {}
3875 };
3876
3877 }
3878
3879 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
3880 static void
3881 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
3882                                            SDValue Chain,
3883                    const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
3884                    SmallVectorImpl<SDValue> &MemOpChains,
3885                    SDLoc dl) {
3886   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
3887     SDValue Arg = TailCallArgs[i].Arg;
3888     SDValue FIN = TailCallArgs[i].FrameIdxOp;
3889     int FI = TailCallArgs[i].FrameIdx;
3890     // Store relative to framepointer.
3891     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
3892                                        MachinePointerInfo::getFixedStack(FI),
3893                                        false, false, 0));
3894   }
3895 }
3896
3897 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
3898 /// the appropriate stack slot for the tail call optimized function call.
3899 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
3900                                                MachineFunction &MF,
3901                                                SDValue Chain,
3902                                                SDValue OldRetAddr,
3903                                                SDValue OldFP,
3904                                                int SPDiff,
3905                                                bool isPPC64,
3906                                                bool isDarwinABI,
3907                                                SDLoc dl) {
3908   if (SPDiff) {
3909     // Calculate the new stack slot for the return address.
3910     int SlotSize = isPPC64 ? 8 : 4;
3911     const PPCFrameLowering *FL =
3912         MF.getSubtarget<PPCSubtarget>().getFrameLowering();
3913     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
3914     int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
3915                                                           NewRetAddrLoc, true);
3916     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3917     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
3918     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
3919                          MachinePointerInfo::getFixedStack(NewRetAddr),
3920                          false, false, 0);
3921
3922     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
3923     // slot as the FP is never overwritten.
3924     if (isDarwinABI) {
3925       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
3926       int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
3927                                                           true);
3928       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
3929       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
3930                            MachinePointerInfo::getFixedStack(NewFPIdx),
3931                            false, false, 0);
3932     }
3933   }
3934   return Chain;
3935 }
3936
3937 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
3938 /// the position of the argument.
3939 static void
3940 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
3941                          SDValue Arg, int SPDiff, unsigned ArgOffset,
3942                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
3943   int Offset = ArgOffset + SPDiff;
3944   uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
3945   int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
3946   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
3947   SDValue FIN = DAG.getFrameIndex(FI, VT);
3948   TailCallArgumentInfo Info;
3949   Info.Arg = Arg;
3950   Info.FrameIdxOp = FIN;
3951   Info.FrameIdx = FI;
3952   TailCallArguments.push_back(Info);
3953 }
3954
3955 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
3956 /// stack slot. Returns the chain as result and the loaded frame pointers in
3957 /// LROpOut/FPOpout. Used when tail calling.
3958 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
3959                                                         int SPDiff,
3960                                                         SDValue Chain,
3961                                                         SDValue &LROpOut,
3962                                                         SDValue &FPOpOut,
3963                                                         bool isDarwinABI,
3964                                                         SDLoc dl) const {
3965   if (SPDiff) {
3966     // Load the LR and FP stack slot for later adjusting.
3967     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
3968     LROpOut = getReturnAddrFrameIndex(DAG);
3969     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
3970                           false, false, false, 0);
3971     Chain = SDValue(LROpOut.getNode(), 1);
3972
3973     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
3974     // slot as the FP is never overwritten.
3975     if (isDarwinABI) {
3976       FPOpOut = getFramePointerFrameIndex(DAG);
3977       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
3978                             false, false, false, 0);
3979       Chain = SDValue(FPOpOut.getNode(), 1);
3980     }
3981   }
3982   return Chain;
3983 }
3984
3985 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
3986 /// by "Src" to address "Dst" of size "Size".  Alignment information is
3987 /// specified by the specific parameter attribute. The copy will be passed as
3988 /// a byval function parameter.
3989 /// Sometimes what we are copying is the end of a larger object, the part that
3990 /// does not fit in registers.
3991 static SDValue
3992 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
3993                           ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
3994                           SDLoc dl) {
3995   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
3996   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
3997                        false, false, false, MachinePointerInfo(),
3998                        MachinePointerInfo());
3999 }
4000
4001 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4002 /// tail calls.
4003 static void
4004 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
4005                  SDValue Arg, SDValue PtrOff, int SPDiff,
4006                  unsigned ArgOffset, bool isPPC64, bool isTailCall,
4007                  bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4008                  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
4009                  SDLoc dl) {
4010   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4011   if (!isTailCall) {
4012     if (isVector) {
4013       SDValue StackPtr;
4014       if (isPPC64)
4015         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4016       else
4017         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4018       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4019                            DAG.getConstant(ArgOffset, dl, PtrVT));
4020     }
4021     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
4022                                        MachinePointerInfo(), false, false, 0));
4023   // Calculate and remember argument location.
4024   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4025                                   TailCallArguments);
4026 }
4027
4028 static
4029 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4030                      SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
4031                      SDValue LROp, SDValue FPOp, bool isDarwinABI,
4032                      SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4033   MachineFunction &MF = DAG.getMachineFunction();
4034
4035   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4036   // might overwrite each other in case of tail call optimization.
4037   SmallVector<SDValue, 8> MemOpChains2;
4038   // Do not flag preceding copytoreg stuff together with the following stuff.
4039   InFlag = SDValue();
4040   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4041                                     MemOpChains2, dl);
4042   if (!MemOpChains2.empty())
4043     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4044
4045   // Store the return address to the appropriate stack slot.
4046   Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
4047                                         isPPC64, isDarwinABI, dl);
4048
4049   // Emit callseq_end just before tailcall node.
4050   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4051                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4052   InFlag = Chain.getValue(1);
4053 }
4054
4055 // Is this global address that of a function that can be called by name? (as
4056 // opposed to something that must hold a descriptor for an indirect call).
4057 static bool isFunctionGlobalAddress(SDValue Callee) {
4058   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4059     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4060         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4061       return false;
4062
4063     return G->getGlobal()->getType()->getElementType()->isFunctionTy();
4064   }
4065
4066   return false;
4067 }
4068
4069 static
4070 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
4071                      SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
4072                      bool isTailCall, bool IsPatchPoint, bool hasNest,
4073                      SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
4074                      SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
4075                      ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
4076
4077   bool isPPC64 = Subtarget.isPPC64();
4078   bool isSVR4ABI = Subtarget.isSVR4ABI();
4079   bool isELFv2ABI = Subtarget.isELFv2ABI();
4080
4081   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4082   NodeTys.push_back(MVT::Other);   // Returns a chain
4083   NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
4084
4085   unsigned CallOpc = PPCISD::CALL;
4086
4087   bool needIndirectCall = true;
4088   if (!isSVR4ABI || !isPPC64)
4089     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
4090       // If this is an absolute destination address, use the munged value.
4091       Callee = SDValue(Dest, 0);
4092       needIndirectCall = false;
4093     }
4094
4095   if (isFunctionGlobalAddress(Callee)) {
4096     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee);
4097     // A call to a TLS address is actually an indirect call to a
4098     // thread-specific pointer.
4099     unsigned OpFlags = 0;
4100     if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
4101          (Subtarget.getTargetTriple().isMacOSX() &&
4102           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
4103          !G->getGlobal()->isStrongDefinitionForLinker()) ||
4104         (Subtarget.isTargetELF() && !isPPC64 &&
4105          !G->getGlobal()->hasLocalLinkage() &&
4106          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
4107       // PC-relative references to external symbols should go through $stub,
4108       // unless we're building with the leopard linker or later, which
4109       // automatically synthesizes these stubs.
4110       OpFlags = PPCII::MO_PLT_OR_STUB;
4111     }
4112
4113     // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4114     // every direct call is) turn it into a TargetGlobalAddress /
4115     // TargetExternalSymbol node so that legalize doesn't hack it.
4116     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
4117                                         Callee.getValueType(), 0, OpFlags);
4118     needIndirectCall = false;
4119   }
4120
4121   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4122     unsigned char OpFlags = 0;
4123
4124     if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
4125          (Subtarget.getTargetTriple().isMacOSX() &&
4126           Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
4127         (Subtarget.isTargetELF() && !isPPC64 &&
4128          DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
4129       // PC-relative references to external symbols should go through $stub,
4130       // unless we're building with the leopard linker or later, which
4131       // automatically synthesizes these stubs.
4132       OpFlags = PPCII::MO_PLT_OR_STUB;
4133     }
4134
4135     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
4136                                          OpFlags);
4137     needIndirectCall = false;
4138   }
4139
4140   if (IsPatchPoint) {
4141     // We'll form an invalid direct call when lowering a patchpoint; the full
4142     // sequence for an indirect call is complicated, and many of the
4143     // instructions introduced might have side effects (and, thus, can't be
4144     // removed later). The call itself will be removed as soon as the
4145     // argument/return lowering is complete, so the fact that it has the wrong
4146     // kind of operands should not really matter.
4147     needIndirectCall = false;
4148   }
4149
4150   if (needIndirectCall) {
4151     // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
4152     // to do the call, we can't use PPCISD::CALL.
4153     SDValue MTCTROps[] = {Chain, Callee, InFlag};
4154
4155     if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
4156       // Function pointers in the 64-bit SVR4 ABI do not point to the function
4157       // entry point, but to the function descriptor (the function entry point
4158       // address is part of the function descriptor though).
4159       // The function descriptor is a three doubleword structure with the
4160       // following fields: function entry point, TOC base address and
4161       // environment pointer.
4162       // Thus for a call through a function pointer, the following actions need
4163       // to be performed:
4164       //   1. Save the TOC of the caller in the TOC save area of its stack
4165       //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4166       //   2. Load the address of the function entry point from the function
4167       //      descriptor.
4168       //   3. Load the TOC of the callee from the function descriptor into r2.
4169       //   4. Load the environment pointer from the function descriptor into
4170       //      r11.
4171       //   5. Branch to the function entry point address.
4172       //   6. On return of the callee, the TOC of the caller needs to be
4173       //      restored (this is done in FinishCall()).
4174       //
4175       // The loads are scheduled at the beginning of the call sequence, and the
4176       // register copies are flagged together to ensure that no other
4177       // operations can be scheduled in between. E.g. without flagging the
4178       // copies together, a TOC access in the caller could be scheduled between
4179       // the assignment of the callee TOC and the branch to the callee, which
4180       // results in the TOC access going through the TOC of the callee instead
4181       // of going through the TOC of the caller, which leads to incorrect code.
4182
4183       // Load the address of the function entry point from the function
4184       // descriptor.
4185       SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
4186       if (LDChain.getValueType() == MVT::Glue)
4187         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
4188
4189       bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors();
4190
4191       MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
4192       SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
4193                                         false, false, LoadsInv, 8);
4194
4195       // Load environment pointer into r11.
4196       SDValue PtrOff = DAG.getIntPtrConstant(16, dl);
4197       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
4198       SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr,
4199                                        MPI.getWithOffset(16), false, false,
4200                                        LoadsInv, 8);
4201
4202       SDValue TOCOff = DAG.getIntPtrConstant(8, dl);
4203       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
4204       SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC,
4205                                    MPI.getWithOffset(8), false, false,
4206                                    LoadsInv, 8);
4207
4208       setUsesTOCBasePtr(DAG);
4209       SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
4210                                         InFlag);
4211       Chain = TOCVal.getValue(0);
4212       InFlag = TOCVal.getValue(1);
4213
4214       // If the function call has an explicit 'nest' parameter, it takes the
4215       // place of the environment pointer.
4216       if (!hasNest) {
4217         SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
4218                                           InFlag);
4219
4220         Chain = EnvVal.getValue(0);
4221         InFlag = EnvVal.getValue(1);
4222       }
4223
4224       MTCTROps[0] = Chain;
4225       MTCTROps[1] = LoadFuncPtr;
4226       MTCTROps[2] = InFlag;
4227     }
4228
4229     Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
4230                         makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
4231     InFlag = Chain.getValue(1);
4232
4233     NodeTys.clear();
4234     NodeTys.push_back(MVT::Other);
4235     NodeTys.push_back(MVT::Glue);
4236     Ops.push_back(Chain);
4237     CallOpc = PPCISD::BCTRL;
4238     Callee.setNode(nullptr);
4239     // Add use of X11 (holding environment pointer)
4240     if (isSVR4ABI && isPPC64 && !isELFv2ABI && !hasNest)
4241       Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
4242     // Add CTR register as callee so a bctr can be emitted later.
4243     if (isTailCall)
4244       Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
4245   }
4246
4247   // If this is a direct call, pass the chain and the callee.
4248   if (Callee.getNode()) {
4249     Ops.push_back(Chain);
4250     Ops.push_back(Callee);
4251   }
4252   // If this is a tail call add stack pointer delta.
4253   if (isTailCall)
4254     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
4255
4256   // Add argument registers to the end of the list so that they are known live
4257   // into the call.
4258   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4259     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4260                                   RegsToPass[i].second.getValueType()));
4261
4262   // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
4263   // into the call.
4264   if (isSVR4ABI && isPPC64 && !IsPatchPoint) {
4265     setUsesTOCBasePtr(DAG);
4266     Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
4267   }
4268
4269   return CallOpc;
4270 }
4271
4272 static
4273 bool isLocalCall(const SDValue &Callee)
4274 {
4275   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4276     return G->getGlobal()->isStrongDefinitionForLinker();
4277   return false;
4278 }
4279
4280 SDValue
4281 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
4282                                    CallingConv::ID CallConv, bool isVarArg,
4283                                    const SmallVectorImpl<ISD::InputArg> &Ins,
4284                                    SDLoc dl, SelectionDAG &DAG,
4285                                    SmallVectorImpl<SDValue> &InVals) const {
4286
4287   SmallVector<CCValAssign, 16> RVLocs;
4288   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4289                     *DAG.getContext());
4290   CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
4291
4292   // Copy all of the result registers out of their specified physreg.
4293   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4294     CCValAssign &VA = RVLocs[i];
4295     assert(VA.isRegLoc() && "Can only return in registers!");
4296
4297     SDValue Val = DAG.getCopyFromReg(Chain, dl,
4298                                      VA.getLocReg(), VA.getLocVT(), InFlag);
4299     Chain = Val.getValue(1);
4300     InFlag = Val.getValue(2);
4301
4302     switch (VA.getLocInfo()) {
4303     default: llvm_unreachable("Unknown loc info!");
4304     case CCValAssign::Full: break;
4305     case CCValAssign::AExt:
4306       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4307       break;
4308     case CCValAssign::ZExt:
4309       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4310                         DAG.getValueType(VA.getValVT()));
4311       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4312       break;
4313     case CCValAssign::SExt:
4314       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4315                         DAG.getValueType(VA.getValVT()));
4316       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4317       break;
4318     }
4319
4320     InVals.push_back(Val);
4321   }
4322
4323   return Chain;
4324 }
4325
4326 SDValue
4327 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
4328                               bool isTailCall, bool isVarArg, bool IsPatchPoint,
4329                               bool hasNest, SelectionDAG &DAG,
4330                               SmallVector<std::pair<unsigned, SDValue>, 8>
4331                                 &RegsToPass,
4332                               SDValue InFlag, SDValue Chain,
4333                               SDValue CallSeqStart, SDValue &Callee,
4334                               int SPDiff, unsigned NumBytes,
4335                               const SmallVectorImpl<ISD::InputArg> &Ins,
4336                               SmallVectorImpl<SDValue> &InVals,
4337                               ImmutableCallSite *CS) const {
4338
4339   std::vector<EVT> NodeTys;
4340   SmallVector<SDValue, 8> Ops;
4341   unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
4342                                  SPDiff, isTailCall, IsPatchPoint, hasNest,
4343                                  RegsToPass, Ops, NodeTys, CS, Subtarget);
4344
4345   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
4346   if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
4347     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
4348
4349   // When performing tail call optimization the callee pops its arguments off
4350   // the stack. Account for this here so these bytes can be pushed back on in
4351   // PPCFrameLowering::eliminateCallFramePseudoInstr.
4352   int BytesCalleePops =
4353     (CallConv == CallingConv::Fast &&
4354      getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
4355
4356   // Add a register mask operand representing the call-preserved registers.
4357   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4358   const uint32_t *Mask =
4359       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
4360   assert(Mask && "Missing call preserved mask for calling convention");
4361   Ops.push_back(DAG.getRegisterMask(Mask));
4362
4363   if (InFlag.getNode())
4364     Ops.push_back(InFlag);
4365
4366   // Emit tail call.
4367   if (isTailCall) {
4368     assert(((Callee.getOpcode() == ISD::Register &&
4369              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
4370             Callee.getOpcode() == ISD::TargetExternalSymbol ||
4371             Callee.getOpcode() == ISD::TargetGlobalAddress ||
4372             isa<ConstantSDNode>(Callee)) &&
4373     "Expecting an global address, external symbol, absolute value or register");
4374
4375     DAG.getMachineFunction().getFrameInfo()->setHasTailCall();
4376     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
4377   }
4378
4379   // Add a NOP immediately after the branch instruction when using the 64-bit
4380   // SVR4 ABI. At link time, if caller and callee are in a different module and
4381   // thus have a different TOC, the call will be replaced with a call to a stub
4382   // function which saves the current TOC, loads the TOC of the callee and
4383   // branches to the callee. The NOP will be replaced with a load instruction
4384   // which restores the TOC of the caller from the TOC save slot of the current
4385   // stack frame. If caller and callee belong to the same module (and have the
4386   // same TOC), the NOP will remain unchanged.
4387
4388   if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
4389       !IsPatchPoint) {
4390     if (CallOpc == PPCISD::BCTRL) {
4391       // This is a call through a function pointer.
4392       // Restore the caller TOC from the save area into R2.
4393       // See PrepareCall() for more information about calls through function
4394       // pointers in the 64-bit SVR4 ABI.
4395       // We are using a target-specific load with r2 hard coded, because the
4396       // result of a target-independent load would never go directly into r2,
4397       // since r2 is a reserved register (which prevents the register allocator
4398       // from allocating it), resulting in an additional register being
4399       // allocated and an unnecessary move instruction being generated.
4400       CallOpc = PPCISD::BCTRL_LOAD_TOC;
4401
4402       EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4403       SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
4404       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
4405       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
4406       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
4407
4408       // The address needs to go after the chain input but before the flag (or
4409       // any other variadic arguments).
4410       Ops.insert(std::next(Ops.begin()), AddTOC);
4411     } else if ((CallOpc == PPCISD::CALL) &&
4412                (!isLocalCall(Callee) ||
4413                 DAG.getTarget().getRelocationModel() == Reloc::PIC_))
4414       // Otherwise insert NOP for non-local calls.
4415       CallOpc = PPCISD::CALL_NOP;
4416   }
4417
4418   Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
4419   InFlag = Chain.getValue(1);
4420
4421   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4422                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
4423                              InFlag, dl);
4424   if (!Ins.empty())
4425     InFlag = Chain.getValue(1);
4426
4427   return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
4428                          Ins, dl, DAG, InVals);
4429 }
4430
4431 SDValue
4432 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
4433                              SmallVectorImpl<SDValue> &InVals) const {
4434   SelectionDAG &DAG                     = CLI.DAG;
4435   SDLoc &dl                             = CLI.DL;
4436   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4437   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
4438   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
4439   SDValue Chain                         = CLI.Chain;
4440   SDValue Callee                        = CLI.Callee;
4441   bool &isTailCall                      = CLI.IsTailCall;
4442   CallingConv::ID CallConv              = CLI.CallConv;
4443   bool isVarArg                         = CLI.IsVarArg;
4444   bool IsPatchPoint                     = CLI.IsPatchPoint;
4445   ImmutableCallSite *CS                 = CLI.CS;
4446
4447   if (isTailCall)
4448     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
4449                                                    Ins, DAG);
4450
4451   if (!isTailCall && CS && CS->isMustTailCall())
4452     report_fatal_error("failed to perform tail call elimination on a call "
4453                        "site marked musttail");
4454
4455   if (Subtarget.isSVR4ABI()) {
4456     if (Subtarget.isPPC64())
4457       return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
4458                               isTailCall, IsPatchPoint, Outs, OutVals, Ins,
4459                               dl, DAG, InVals, CS);
4460     else
4461       return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
4462                               isTailCall, IsPatchPoint, Outs, OutVals, Ins,
4463                               dl, DAG, InVals, CS);
4464   }
4465
4466   return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
4467                           isTailCall, IsPatchPoint, Outs, OutVals, Ins,
4468                           dl, DAG, InVals, CS);
4469 }
4470
4471 SDValue
4472 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
4473                                     CallingConv::ID CallConv, bool isVarArg,
4474                                     bool isTailCall, bool IsPatchPoint,
4475                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4476                                     const SmallVectorImpl<SDValue> &OutVals,
4477                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4478                                     SDLoc dl, SelectionDAG &DAG,
4479                                     SmallVectorImpl<SDValue> &InVals,
4480                                     ImmutableCallSite *CS) const {
4481   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
4482   // of the 32-bit SVR4 ABI stack frame layout.
4483
4484   assert((CallConv == CallingConv::C ||
4485           CallConv == CallingConv::Fast) && "Unknown calling convention!");
4486
4487   unsigned PtrByteSize = 4;
4488
4489   MachineFunction &MF = DAG.getMachineFunction();
4490
4491   // Mark this function as potentially containing a function that contains a
4492   // tail call. As a consequence the frame pointer will be used for dynamicalloc
4493   // and restoring the callers stack pointer in this functions epilog. This is
4494   // done because by tail calling the called function might overwrite the value
4495   // in this function's (MF) stack pointer stack slot 0(SP).
4496   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4497       CallConv == CallingConv::Fast)
4498     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4499
4500   // Count how many bytes are to be pushed on the stack, including the linkage
4501   // area, parameter list area and the part of the local variable space which
4502   // contains copies of aggregates which are passed by value.
4503
4504   // Assign locations to all of the outgoing arguments.
4505   SmallVector<CCValAssign, 16> ArgLocs;
4506   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4507                  *DAG.getContext());
4508
4509   // Reserve space for the linkage area on the stack.
4510   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
4511                        PtrByteSize);
4512
4513   if (isVarArg) {
4514     // Handle fixed and variable vector arguments differently.
4515     // Fixed vector arguments go into registers as long as registers are
4516     // available. Variable vector arguments always go into memory.
4517     unsigned NumArgs = Outs.size();
4518
4519     for (unsigned i = 0; i != NumArgs; ++i) {
4520       MVT ArgVT = Outs[i].VT;
4521       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4522       bool Result;
4523
4524       if (Outs[i].IsFixed) {
4525         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
4526                                CCInfo);
4527       } else {
4528         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
4529                                       ArgFlags, CCInfo);
4530       }
4531
4532       if (Result) {
4533 #ifndef NDEBUG
4534         errs() << "Call operand #" << i << " has unhandled type "
4535              << EVT(ArgVT).getEVTString() << "\n";
4536 #endif
4537         llvm_unreachable(nullptr);
4538       }
4539     }
4540   } else {
4541     // All arguments are treated the same.
4542     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
4543   }
4544
4545   // Assign locations to all of the outgoing aggregate by value arguments.
4546   SmallVector<CCValAssign, 16> ByValArgLocs;
4547   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4548                       ByValArgLocs, *DAG.getContext());
4549
4550   // Reserve stack space for the allocations in CCInfo.
4551   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
4552
4553   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
4554
4555   // Size of the linkage area, parameter list area and the part of the local
4556   // space variable where copies of aggregates which are passed by value are
4557   // stored.
4558   unsigned NumBytes = CCByValInfo.getNextStackOffset();
4559
4560   // Calculate by how many bytes the stack has to be adjusted in case of tail
4561   // call optimization.
4562   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4563
4564   // Adjust the stack pointer for the new arguments...
4565   // These operations are automatically eliminated by the prolog/epilog pass
4566   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4567                                dl);
4568   SDValue CallSeqStart = Chain;
4569
4570   // Load the return address and frame pointer so it can be moved somewhere else
4571   // later.
4572   SDValue LROp, FPOp;
4573   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
4574                                        dl);
4575
4576   // Set up a copy of the stack pointer for use loading and storing any
4577   // arguments that may not fit in the registers available for argument
4578   // passing.
4579   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4580
4581   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4582   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4583   SmallVector<SDValue, 8> MemOpChains;
4584
4585   bool seenFloatArg = false;
4586   // Walk the register/memloc assignments, inserting copies/loads.
4587   for (unsigned i = 0, j = 0, e = ArgLocs.size();
4588        i != e;
4589        ++i) {
4590     CCValAssign &VA = ArgLocs[i];
4591     SDValue Arg = OutVals[i];
4592     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4593
4594     if (Flags.isByVal()) {
4595       // Argument is an aggregate which is passed by value, thus we need to
4596       // create a copy of it in the local variable space of the current stack
4597       // frame (which is the stack frame of the caller) and pass the address of
4598       // this copy to the callee.
4599       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
4600       CCValAssign &ByValVA = ByValArgLocs[j++];
4601       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
4602
4603       // Memory reserved in the local variable space of the callers stack frame.
4604       unsigned LocMemOffset = ByValVA.getLocMemOffset();
4605
4606       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4607       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
4608                            StackPtr, PtrOff);
4609
4610       // Create a copy of the argument in the local area of the current
4611       // stack frame.
4612       SDValue MemcpyCall =
4613         CreateCopyOfByValArgument(Arg, PtrOff,
4614                                   CallSeqStart.getNode()->getOperand(0),
4615                                   Flags, DAG, dl);
4616
4617       // This must go outside the CALLSEQ_START..END.
4618       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4619                            CallSeqStart.getNode()->getOperand(1),
4620                            SDLoc(MemcpyCall));
4621       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4622                              NewCallSeqStart.getNode());
4623       Chain = CallSeqStart = NewCallSeqStart;
4624
4625       // Pass the address of the aggregate copy on the stack either in a
4626       // physical register or in the parameter list area of the current stack
4627       // frame to the callee.
4628       Arg = PtrOff;
4629     }
4630
4631     if (VA.isRegLoc()) {
4632       if (Arg.getValueType() == MVT::i1)
4633         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
4634
4635       seenFloatArg |= VA.getLocVT().isFloatingPoint();
4636       // Put argument in a physical register.
4637       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4638     } else {
4639       // Put argument in the parameter list area of the current stack frame.
4640       assert(VA.isMemLoc());
4641       unsigned LocMemOffset = VA.getLocMemOffset();
4642
4643       if (!isTailCall) {
4644         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
4645         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
4646                              StackPtr, PtrOff);
4647
4648         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
4649                                            MachinePointerInfo(),
4650                                            false, false, 0));
4651       } else {
4652         // Calculate and remember argument location.
4653         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
4654                                  TailCallArguments);
4655       }
4656     }
4657   }
4658
4659   if (!MemOpChains.empty())
4660     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4661
4662   // Build a sequence of copy-to-reg nodes chained together with token chain
4663   // and flag operands which copy the outgoing args into the appropriate regs.
4664   SDValue InFlag;
4665   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4666     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4667                              RegsToPass[i].second, InFlag);
4668     InFlag = Chain.getValue(1);
4669   }
4670
4671   // Set CR bit 6 to true if this is a vararg call with floating args passed in
4672   // registers.
4673   if (isVarArg) {
4674     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
4675     SDValue Ops[] = { Chain, InFlag };
4676
4677     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
4678                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
4679
4680     InFlag = Chain.getValue(1);
4681   }
4682
4683   if (isTailCall)
4684     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
4685                     false, TailCallArguments);
4686
4687   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
4688                     /* unused except on PPC64 ELFv1 */ false, DAG,
4689                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
4690                     NumBytes, Ins, InVals, CS);
4691 }
4692
4693 // Copy an argument into memory, being careful to do this outside the
4694 // call sequence for the call to which the argument belongs.
4695 SDValue
4696 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
4697                                               SDValue CallSeqStart,
4698                                               ISD::ArgFlagsTy Flags,
4699                                               SelectionDAG &DAG,
4700                                               SDLoc dl) const {
4701   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
4702                         CallSeqStart.getNode()->getOperand(0),
4703                         Flags, DAG, dl);
4704   // The MEMCPY must go outside the CALLSEQ_START..END.
4705   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
4706                              CallSeqStart.getNode()->getOperand(1),
4707                              SDLoc(MemcpyCall));
4708   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
4709                          NewCallSeqStart.getNode());
4710   return NewCallSeqStart;
4711 }
4712
4713 SDValue
4714 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
4715                                     CallingConv::ID CallConv, bool isVarArg,
4716                                     bool isTailCall, bool IsPatchPoint,
4717                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
4718                                     const SmallVectorImpl<SDValue> &OutVals,
4719                                     const SmallVectorImpl<ISD::InputArg> &Ins,
4720                                     SDLoc dl, SelectionDAG &DAG,
4721                                     SmallVectorImpl<SDValue> &InVals,
4722                                     ImmutableCallSite *CS) const {
4723
4724   bool isELFv2ABI = Subtarget.isELFv2ABI();
4725   bool isLittleEndian = Subtarget.isLittleEndian();
4726   unsigned NumOps = Outs.size();
4727   bool hasNest = false;
4728
4729   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4730   unsigned PtrByteSize = 8;
4731
4732   MachineFunction &MF = DAG.getMachineFunction();
4733
4734   // Mark this function as potentially containing a function that contains a
4735   // tail call. As a consequence the frame pointer will be used for dynamicalloc
4736   // and restoring the callers stack pointer in this functions epilog. This is
4737   // done because by tail calling the called function might overwrite the value
4738   // in this function's (MF) stack pointer stack slot 0(SP).
4739   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4740       CallConv == CallingConv::Fast)
4741     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
4742
4743   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4744          "fastcc not supported on varargs functions");
4745
4746   // Count how many bytes are to be pushed on the stack, including the linkage
4747   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
4748   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
4749   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
4750   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4751   unsigned NumBytes = LinkageSize;
4752   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4753   unsigned &QFPR_idx = FPR_idx;
4754
4755   static const MCPhysReg GPR[] = {
4756     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4757     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4758   };
4759   static const MCPhysReg VR[] = {
4760     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4761     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4762   };
4763   static const MCPhysReg VSRH[] = {
4764     PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
4765     PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
4766   };
4767
4768   const unsigned NumGPRs = array_lengthof(GPR);
4769   const unsigned NumFPRs = 13;
4770   const unsigned NumVRs  = array_lengthof(VR);
4771   const unsigned NumQFPRs = NumFPRs;
4772
4773   // When using the fast calling convention, we don't provide backing for
4774   // arguments that will be in registers.
4775   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
4776
4777   // Add up all the space actually used.
4778   for (unsigned i = 0; i != NumOps; ++i) {
4779     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4780     EVT ArgVT = Outs[i].VT;
4781     EVT OrigVT = Outs[i].ArgVT;
4782
4783     if (Flags.isNest())
4784       continue;
4785
4786     if (CallConv == CallingConv::Fast) {
4787       if (Flags.isByVal())
4788         NumGPRsUsed += (Flags.getByValSize()+7)/8;
4789       else
4790         switch (ArgVT.getSimpleVT().SimpleTy) {
4791         default: llvm_unreachable("Unexpected ValueType for argument!");
4792         case MVT::i1:
4793         case MVT::i32:
4794         case MVT::i64:
4795           if (++NumGPRsUsed <= NumGPRs)
4796             continue;
4797           break;
4798         case MVT::v4i32:
4799         case MVT::v8i16:
4800         case MVT::v16i8:
4801         case MVT::v2f64:
4802         case MVT::v2i64:
4803         case MVT::v1i128:
4804           if (++NumVRsUsed <= NumVRs)
4805             continue;
4806           break;
4807         case MVT::v4f32:
4808           // When using QPX, this is handled like a FP register, otherwise, it
4809           // is an Altivec register.
4810           if (Subtarget.hasQPX()) {
4811             if (++NumFPRsUsed <= NumFPRs)
4812               continue;
4813           } else {
4814             if (++NumVRsUsed <= NumVRs)
4815               continue;
4816           }
4817           break;
4818         case MVT::f32:
4819         case MVT::f64:
4820         case MVT::v4f64: // QPX
4821         case MVT::v4i1:  // QPX
4822           if (++NumFPRsUsed <= NumFPRs)
4823             continue;
4824           break;
4825         }
4826     }
4827
4828     /* Respect alignment of argument on the stack.  */
4829     unsigned Align =
4830       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4831     NumBytes = ((NumBytes + Align - 1) / Align) * Align;
4832
4833     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4834     if (Flags.isInConsecutiveRegsLast())
4835       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4836   }
4837
4838   unsigned NumBytesActuallyUsed = NumBytes;
4839
4840   // The prolog code of the callee may store up to 8 GPR argument registers to
4841   // the stack, allowing va_start to index over them in memory if its varargs.
4842   // Because we cannot tell if this is needed on the caller side, we have to
4843   // conservatively assume that it is needed.  As such, make sure we have at
4844   // least enough stack space for the caller to store the 8 GPRs.
4845   // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
4846   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
4847
4848   // Tail call needs the stack to be aligned.
4849   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
4850       CallConv == CallingConv::Fast)
4851     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
4852
4853   // Calculate by how many bytes the stack has to be adjusted in case of tail
4854   // call optimization.
4855   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
4856
4857   // To protect arguments on the stack from being clobbered in a tail call,
4858   // force all the loads to happen before doing any other lowering.
4859   if (isTailCall)
4860     Chain = DAG.getStackArgumentTokenFactor(Chain);
4861
4862   // Adjust the stack pointer for the new arguments...
4863   // These operations are automatically eliminated by the prolog/epilog pass
4864   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4865                                dl);
4866   SDValue CallSeqStart = Chain;
4867
4868   // Load the return address and frame pointer so it can be move somewhere else
4869   // later.
4870   SDValue LROp, FPOp;
4871   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
4872                                        dl);
4873
4874   // Set up a copy of the stack pointer for use loading and storing any
4875   // arguments that may not fit in the registers available for argument
4876   // passing.
4877   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4878
4879   // Figure out which arguments are going to go in registers, and which in
4880   // memory.  Also, if this is a vararg function, floating point operations
4881   // must be stored to our stack, and loaded into integer regs as well, if
4882   // any integer regs are available for argument passing.
4883   unsigned ArgOffset = LinkageSize;
4884
4885   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4886   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
4887
4888   SmallVector<SDValue, 8> MemOpChains;
4889   for (unsigned i = 0; i != NumOps; ++i) {
4890     SDValue Arg = OutVals[i];
4891     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4892     EVT ArgVT = Outs[i].VT;
4893     EVT OrigVT = Outs[i].ArgVT;
4894
4895     // PtrOff will be used to store the current argument to the stack if a
4896     // register cannot be found for it.
4897     SDValue PtrOff;
4898
4899     // We re-align the argument offset for each argument, except when using the
4900     // fast calling convention, when we need to make sure we do that only when
4901     // we'll actually use a stack slot.
4902     auto ComputePtrOff = [&]() {
4903       /* Respect alignment of argument on the stack.  */
4904       unsigned Align =
4905         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4906       ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
4907
4908       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
4909
4910       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
4911     };
4912
4913     if (CallConv != CallingConv::Fast) {
4914       ComputePtrOff();
4915
4916       /* Compute GPR index associated with argument offset.  */
4917       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4918       GPR_idx = std::min(GPR_idx, NumGPRs);
4919     }
4920
4921     // Promote integers to 64-bit values.
4922     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
4923       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
4924       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
4925       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
4926     }
4927
4928     // FIXME memcpy is used way more than necessary.  Correctness first.
4929     // Note: "by value" is code for passing a structure by value, not
4930     // basic types.
4931     if (Flags.isByVal()) {
4932       // Note: Size includes alignment padding, so
4933       //   struct x { short a; char b; }
4934       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
4935       // These are the proper values we need for right-justifying the
4936       // aggregate in a parameter register.
4937       unsigned Size = Flags.getByValSize();
4938
4939       // An empty aggregate parameter takes up no storage and no
4940       // registers.
4941       if (Size == 0)
4942         continue;
4943
4944       if (CallConv == CallingConv::Fast)
4945         ComputePtrOff();
4946
4947       // All aggregates smaller than 8 bytes must be passed right-justified.
4948       if (Size==1 || Size==2 || Size==4) {
4949         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
4950         if (GPR_idx != NumGPRs) {
4951           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
4952                                         MachinePointerInfo(), VT,
4953                                         false, false, false, 0);
4954           MemOpChains.push_back(Load.getValue(1));
4955           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
4956
4957           ArgOffset += PtrByteSize;
4958           continue;
4959         }
4960       }
4961
4962       if (GPR_idx == NumGPRs && Size < 8) {
4963         SDValue AddPtr = PtrOff;
4964         if (!isLittleEndian) {
4965           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
4966                                           PtrOff.getValueType());
4967           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
4968         }
4969         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
4970                                                           CallSeqStart,
4971                                                           Flags, DAG, dl);
4972         ArgOffset += PtrByteSize;
4973         continue;
4974       }
4975       // Copy entire object into memory.  There are cases where gcc-generated
4976       // code assumes it is there, even if it could be put entirely into
4977       // registers.  (This is not what the doc says.)
4978
4979       // FIXME: The above statement is likely due to a misunderstanding of the
4980       // documents.  All arguments must be copied into the parameter area BY
4981       // THE CALLEE in the event that the callee takes the address of any
4982       // formal argument.  That has not yet been implemented.  However, it is
4983       // reasonable to use the stack area as a staging area for the register
4984       // load.
4985
4986       // Skip this for small aggregates, as we will use the same slot for a
4987       // right-justified copy, below.
4988       if (Size >= 8)
4989         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
4990                                                           CallSeqStart,
4991                                                           Flags, DAG, dl);
4992
4993       // When a register is available, pass a small aggregate right-justified.
4994       if (Size < 8 && GPR_idx != NumGPRs) {
4995         // The easiest way to get this right-justified in a register
4996         // is to copy the structure into the rightmost portion of a
4997         // local variable slot, then load the whole slot into the
4998         // register.
4999         // FIXME: The memcpy seems to produce pretty awful code for
5000         // small aggregates, particularly for packed ones.
5001         // FIXME: It would be preferable to use the slot in the
5002         // parameter save area instead of a new local variable.
5003         SDValue AddPtr = PtrOff;
5004         if (!isLittleEndian) {
5005           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
5006           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5007         }
5008         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5009                                                           CallSeqStart,
5010                                                           Flags, DAG, dl);
5011
5012         // Load the slot into the register.
5013         SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
5014                                    MachinePointerInfo(),
5015                                    false, false, false, 0);
5016         MemOpChains.push_back(Load.getValue(1));
5017         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5018
5019         // Done with this argument.
5020         ArgOffset += PtrByteSize;
5021         continue;
5022       }
5023
5024       // For aggregates larger than PtrByteSize, copy the pieces of the
5025       // object that fit into registers from the parameter save area.
5026       for (unsigned j=0; j<Size; j+=PtrByteSize) {
5027         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5028         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5029         if (GPR_idx != NumGPRs) {
5030           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
5031                                      MachinePointerInfo(),
5032                                      false, false, false, 0);
5033           MemOpChains.push_back(Load.getValue(1));
5034           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5035           ArgOffset += PtrByteSize;
5036         } else {
5037           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5038           break;
5039         }
5040       }
5041       continue;
5042     }
5043
5044     switch (Arg.getSimpleValueType().SimpleTy) {
5045     default: llvm_unreachable("Unexpected ValueType for argument!");
5046     case MVT::i1:
5047     case MVT::i32:
5048     case MVT::i64:
5049       if (Flags.isNest()) {
5050         // The 'nest' parameter, if any, is passed in R11.
5051         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
5052         hasNest = true;
5053         break;
5054       }
5055
5056       // These can be scalar arguments or elements of an integer array type
5057       // passed directly.  Clang may use those instead of "byval" aggregate
5058       // types to avoid forcing arguments to memory unnecessarily.
5059       if (GPR_idx != NumGPRs) {
5060         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5061       } else {
5062         if (CallConv == CallingConv::Fast)
5063           ComputePtrOff();
5064
5065         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5066                          true, isTailCall, false, MemOpChains,
5067                          TailCallArguments, dl);
5068         if (CallConv == CallingConv::Fast)
5069           ArgOffset += PtrByteSize;
5070       }
5071       if (CallConv != CallingConv::Fast)
5072         ArgOffset += PtrByteSize;
5073       break;
5074     case MVT::f32:
5075     case MVT::f64: {
5076       // These can be scalar arguments or elements of a float array type
5077       // passed directly.  The latter are used to implement ELFv2 homogenous
5078       // float aggregates.
5079
5080       // Named arguments go into FPRs first, and once they overflow, the
5081       // remaining arguments go into GPRs and then the parameter save area.
5082       // Unnamed arguments for vararg functions always go to GPRs and
5083       // then the parameter save area.  For now, put all arguments to vararg
5084       // routines always in both locations (FPR *and* GPR or stack slot).
5085       bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
5086       bool NeededLoad = false;
5087
5088       // First load the argument into the next available FPR.
5089       if (FPR_idx != NumFPRs)
5090         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5091
5092       // Next, load the argument into GPR or stack slot if needed.
5093       if (!NeedGPROrStack)
5094         ;
5095       else if (GPR_idx != NumGPRs && CallConv != CallingConv::Fast) {
5096         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
5097         // once we support fp <-> gpr moves.
5098
5099         // In the non-vararg case, this can only ever happen in the
5100         // presence of f32 array types, since otherwise we never run
5101         // out of FPRs before running out of GPRs.
5102         SDValue ArgVal;
5103
5104         // Double values are always passed in a single GPR.
5105         if (Arg.getValueType() != MVT::f32) {
5106           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
5107
5108         // Non-array float values are extended and passed in a GPR.
5109         } else if (!Flags.isInConsecutiveRegs()) {
5110           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5111           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5112
5113         // If we have an array of floats, we collect every odd element
5114         // together with its predecessor into one GPR.
5115         } else if (ArgOffset % PtrByteSize != 0) {
5116           SDValue Lo, Hi;
5117           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
5118           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5119           if (!isLittleEndian)
5120             std::swap(Lo, Hi);
5121           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5122
5123         // The final element, if even, goes into the first half of a GPR.
5124         } else if (Flags.isInConsecutiveRegsLast()) {
5125           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
5126           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
5127           if (!isLittleEndian)
5128             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
5129                                  DAG.getConstant(32, dl, MVT::i32));
5130
5131         // Non-final even elements are skipped; they will be handled
5132         // together the with subsequent argument on the next go-around.
5133         } else
5134           ArgVal = SDValue();
5135
5136         if (ArgVal.getNode())
5137           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
5138       } else {
5139         if (CallConv == CallingConv::Fast)
5140           ComputePtrOff();
5141
5142         // Single-precision floating-point values are mapped to the
5143         // second (rightmost) word of the stack doubleword.
5144         if (Arg.getValueType() == MVT::f32 &&
5145             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
5146           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5147           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5148         }
5149
5150         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5151                          true, isTailCall, false, MemOpChains,
5152                          TailCallArguments, dl);
5153
5154         NeededLoad = true;
5155       }
5156       // When passing an array of floats, the array occupies consecutive
5157       // space in the argument area; only round up to the next doubleword
5158       // at the end of the array.  Otherwise, each float takes 8 bytes.
5159       if (CallConv != CallingConv::Fast || NeededLoad) {
5160         ArgOffset += (Arg.getValueType() == MVT::f32 &&
5161                       Flags.isInConsecutiveRegs()) ? 4 : 8;
5162         if (Flags.isInConsecutiveRegsLast())
5163           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5164       }
5165       break;
5166     }
5167     case MVT::v4f32:
5168     case MVT::v4i32:
5169     case MVT::v8i16:
5170     case MVT::v16i8:
5171     case MVT::v2f64:
5172     case MVT::v2i64:
5173     case MVT::v1i128:
5174       if (!Subtarget.hasQPX()) {
5175       // These can be scalar arguments or elements of a vector array type
5176       // passed directly.  The latter are used to implement ELFv2 homogenous
5177       // vector aggregates.
5178
5179       // For a varargs call, named arguments go into VRs or on the stack as
5180       // usual; unnamed arguments always go to the stack or the corresponding
5181       // GPRs when within range.  For now, we always put the value in both
5182       // locations (or even all three).
5183       if (isVarArg) {
5184         // We could elide this store in the case where the object fits
5185         // entirely in R registers.  Maybe later.
5186         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5187                                      MachinePointerInfo(), false, false, 0);
5188         MemOpChains.push_back(Store);
5189         if (VR_idx != NumVRs) {
5190           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
5191                                      MachinePointerInfo(),
5192                                      false, false, false, 0);
5193           MemOpChains.push_back(Load.getValue(1));
5194
5195           unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
5196                            Arg.getSimpleValueType() == MVT::v2i64) ?
5197                           VSRH[VR_idx] : VR[VR_idx];
5198           ++VR_idx;
5199
5200           RegsToPass.push_back(std::make_pair(VReg, Load));
5201         }
5202         ArgOffset += 16;
5203         for (unsigned i=0; i<16; i+=PtrByteSize) {
5204           if (GPR_idx == NumGPRs)
5205             break;
5206           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5207                                    DAG.getConstant(i, dl, PtrVT));
5208           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5209                                      false, false, false, 0);
5210           MemOpChains.push_back(Load.getValue(1));
5211           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5212         }
5213         break;
5214       }
5215
5216       // Non-varargs Altivec params go into VRs or on the stack.
5217       if (VR_idx != NumVRs) {
5218         unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
5219                          Arg.getSimpleValueType() == MVT::v2i64) ?
5220                         VSRH[VR_idx] : VR[VR_idx];
5221         ++VR_idx;
5222
5223         RegsToPass.push_back(std::make_pair(VReg, Arg));
5224       } else {
5225         if (CallConv == CallingConv::Fast)
5226           ComputePtrOff();
5227
5228         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5229                          true, isTailCall, true, MemOpChains,
5230                          TailCallArguments, dl);
5231         if (CallConv == CallingConv::Fast)
5232           ArgOffset += 16;
5233       }
5234
5235       if (CallConv != CallingConv::Fast)
5236         ArgOffset += 16;
5237       break;
5238       } // not QPX
5239
5240       assert(Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32 &&
5241              "Invalid QPX parameter type");
5242
5243       /* fall through */
5244     case MVT::v4f64:
5245     case MVT::v4i1: {
5246       bool IsF32 = Arg.getValueType().getSimpleVT().SimpleTy == MVT::v4f32;
5247       if (isVarArg) {
5248         // We could elide this store in the case where the object fits
5249         // entirely in R registers.  Maybe later.
5250         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5251                                      MachinePointerInfo(), false, false, 0);
5252         MemOpChains.push_back(Store);
5253         if (QFPR_idx != NumQFPRs) {
5254           SDValue Load = DAG.getLoad(IsF32 ? MVT::v4f32 : MVT::v4f64, dl,
5255                                      Store, PtrOff, MachinePointerInfo(),
5256                                      false, false, false, 0);
5257           MemOpChains.push_back(Load.getValue(1));
5258           RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Load));
5259         }
5260         ArgOffset += (IsF32 ? 16 : 32);
5261         for (unsigned i = 0; i < (IsF32 ? 16U : 32U); i += PtrByteSize) {
5262           if (GPR_idx == NumGPRs)
5263             break;
5264           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5265                                    DAG.getConstant(i, dl, PtrVT));
5266           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5267                                      false, false, false, 0);
5268           MemOpChains.push_back(Load.getValue(1));
5269           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5270         }
5271         break;
5272       }
5273
5274       // Non-varargs QPX params go into registers or on the stack.
5275       if (QFPR_idx != NumQFPRs) {
5276         RegsToPass.push_back(std::make_pair(QFPR[QFPR_idx++], Arg));
5277       } else {
5278         if (CallConv == CallingConv::Fast)
5279           ComputePtrOff();
5280
5281         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5282                          true, isTailCall, true, MemOpChains,
5283                          TailCallArguments, dl);
5284         if (CallConv == CallingConv::Fast)
5285           ArgOffset += (IsF32 ? 16 : 32);
5286       }
5287
5288       if (CallConv != CallingConv::Fast)
5289         ArgOffset += (IsF32 ? 16 : 32);
5290       break;
5291       }
5292     }
5293   }
5294
5295   assert(NumBytesActuallyUsed == ArgOffset);
5296   (void)NumBytesActuallyUsed;
5297
5298   if (!MemOpChains.empty())
5299     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5300
5301   // Check if this is an indirect call (MTCTR/BCTRL).
5302   // See PrepareCall() for more information about calls through function
5303   // pointers in the 64-bit SVR4 ABI.
5304   if (!isTailCall && !IsPatchPoint &&
5305       !isFunctionGlobalAddress(Callee) &&
5306       !isa<ExternalSymbolSDNode>(Callee)) {
5307     // Load r2 into a virtual register and store it to the TOC save area.
5308     setUsesTOCBasePtr(DAG);
5309     SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
5310     // TOC save area offset.
5311     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5312     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5313     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5314     Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
5315                          MachinePointerInfo::getStack(TOCSaveOffset),
5316                          false, false, 0);
5317     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
5318     // This does not mean the MTCTR instruction must use R12; it's easier
5319     // to model this as an extra parameter, so do that.
5320     if (isELFv2ABI && !IsPatchPoint)
5321       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
5322   }
5323
5324   // Build a sequence of copy-to-reg nodes chained together with token chain
5325   // and flag operands which copy the outgoing args into the appropriate regs.
5326   SDValue InFlag;
5327   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5328     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5329                              RegsToPass[i].second, InFlag);
5330     InFlag = Chain.getValue(1);
5331   }
5332
5333   if (isTailCall)
5334     PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
5335                     FPOp, true, TailCallArguments);
5336
5337   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
5338                     hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5339                     Callee, SPDiff, NumBytes, Ins, InVals, CS);
5340 }
5341
5342 SDValue
5343 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
5344                                     CallingConv::ID CallConv, bool isVarArg,
5345                                     bool isTailCall, bool IsPatchPoint,
5346                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
5347                                     const SmallVectorImpl<SDValue> &OutVals,
5348                                     const SmallVectorImpl<ISD::InputArg> &Ins,
5349                                     SDLoc dl, SelectionDAG &DAG,
5350                                     SmallVectorImpl<SDValue> &InVals,
5351                                     ImmutableCallSite *CS) const {
5352
5353   unsigned NumOps = Outs.size();
5354
5355   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5356   bool isPPC64 = PtrVT == MVT::i64;
5357   unsigned PtrByteSize = isPPC64 ? 8 : 4;
5358
5359   MachineFunction &MF = DAG.getMachineFunction();
5360
5361   // Mark this function as potentially containing a function that contains a
5362   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5363   // and restoring the callers stack pointer in this functions epilog. This is
5364   // done because by tail calling the called function might overwrite the value
5365   // in this function's (MF) stack pointer stack slot 0(SP).
5366   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5367       CallConv == CallingConv::Fast)
5368     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5369
5370   // Count how many bytes are to be pushed on the stack, including the linkage
5371   // area, and parameter passing area.  We start with 24/48 bytes, which is
5372   // prereserved space for [SP][CR][LR][3 x unused].
5373   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5374   unsigned NumBytes = LinkageSize;
5375
5376   // Add up all the space actually used.
5377   // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
5378   // they all go in registers, but we must reserve stack space for them for
5379   // possible use by the caller.  In varargs or 64-bit calls, parameters are
5380   // assigned stack space in order, with padding so Altivec parameters are
5381   // 16-byte aligned.
5382   unsigned nAltivecParamsAtEnd = 0;
5383   for (unsigned i = 0; i != NumOps; ++i) {
5384     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5385     EVT ArgVT = Outs[i].VT;
5386     // Varargs Altivec parameters are padded to a 16 byte boundary.
5387     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
5388         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
5389         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
5390       if (!isVarArg && !isPPC64) {
5391         // Non-varargs Altivec parameters go after all the non-Altivec
5392         // parameters; handle those later so we know how much padding we need.
5393         nAltivecParamsAtEnd++;
5394         continue;
5395       }
5396       // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
5397       NumBytes = ((NumBytes+15)/16)*16;
5398     }
5399     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5400   }
5401
5402   // Allow for Altivec parameters at the end, if needed.
5403   if (nAltivecParamsAtEnd) {
5404     NumBytes = ((NumBytes+15)/16)*16;
5405     NumBytes += 16*nAltivecParamsAtEnd;
5406   }
5407
5408   // The prolog code of the callee may store up to 8 GPR argument registers to
5409   // the stack, allowing va_start to index over them in memory if its varargs.
5410   // Because we cannot tell if this is needed on the caller side, we have to
5411   // conservatively assume that it is needed.  As such, make sure we have at
5412   // least enough stack space for the caller to store the 8 GPRs.
5413   NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5414
5415   // Tail call needs the stack to be aligned.
5416   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5417       CallConv == CallingConv::Fast)
5418     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5419
5420   // Calculate by how many bytes the stack has to be adjusted in case of tail
5421   // call optimization.
5422   int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
5423
5424   // To protect arguments on the stack from being clobbered in a tail call,
5425   // force all the loads to happen before doing any other lowering.
5426   if (isTailCall)
5427     Chain = DAG.getStackArgumentTokenFactor(Chain);
5428
5429   // Adjust the stack pointer for the new arguments...
5430   // These operations are automatically eliminated by the prolog/epilog pass
5431   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5432                                dl);
5433   SDValue CallSeqStart = Chain;
5434
5435   // Load the return address and frame pointer so it can be move somewhere else
5436   // later.
5437   SDValue LROp, FPOp;
5438   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
5439                                        dl);
5440
5441   // Set up a copy of the stack pointer for use loading and storing any
5442   // arguments that may not fit in the registers available for argument
5443   // passing.
5444   SDValue StackPtr;
5445   if (isPPC64)
5446     StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5447   else
5448     StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5449
5450   // Figure out which arguments are going to go in registers, and which in
5451   // memory.  Also, if this is a vararg function, floating point operations
5452   // must be stored to our stack, and loaded into integer regs as well, if
5453   // any integer regs are available for argument passing.
5454   unsigned ArgOffset = LinkageSize;
5455   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5456
5457   static const MCPhysReg GPR_32[] = {           // 32-bit registers.
5458     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
5459     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
5460   };
5461   static const MCPhysReg GPR_64[] = {           // 64-bit registers.
5462     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5463     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5464   };
5465   static const MCPhysReg VR[] = {
5466     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5467     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5468   };
5469   const unsigned NumGPRs = array_lengthof(GPR_32);
5470   const unsigned NumFPRs = 13;
5471   const unsigned NumVRs  = array_lengthof(VR);
5472
5473   const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
5474
5475   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5476   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5477
5478   SmallVector<SDValue, 8> MemOpChains;
5479   for (unsigned i = 0; i != NumOps; ++i) {
5480     SDValue Arg = OutVals[i];
5481     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5482
5483     // PtrOff will be used to store the current argument to the stack if a
5484     // register cannot be found for it.
5485     SDValue PtrOff;
5486
5487     PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5488
5489     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5490
5491     // On PPC64, promote integers to 64-bit values.
5492     if (isPPC64 && Arg.getValueType() == MVT::i32) {
5493       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5494       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5495       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5496     }
5497
5498     // FIXME memcpy is used way more than necessary.  Correctness first.
5499     // Note: "by value" is code for passing a structure by value, not
5500     // basic types.
5501     if (Flags.isByVal()) {
5502       unsigned Size = Flags.getByValSize();
5503       // Very small objects are passed right-justified.  Everything else is
5504       // passed left-justified.
5505       if (Size==1 || Size==2) {
5506         EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
5507         if (GPR_idx != NumGPRs) {
5508           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5509                                         MachinePointerInfo(), VT,
5510                                         false, false, false, 0);
5511           MemOpChains.push_back(Load.getValue(1));
5512           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5513
5514           ArgOffset += PtrByteSize;
5515         } else {
5516           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5517                                           PtrOff.getValueType());
5518           SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5519           Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
5520                                                             CallSeqStart,
5521                                                             Flags, DAG, dl);
5522           ArgOffset += PtrByteSize;
5523         }
5524         continue;
5525       }
5526       // Copy entire object into memory.  There are cases where gcc-generated
5527       // code assumes it is there, even if it could be put entirely into
5528       // registers.  (This is not what the doc says.)
5529       Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
5530                                                         CallSeqStart,
5531                                                         Flags, DAG, dl);
5532
5533       // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
5534       // copy the pieces of the object that fit into registers from the
5535       // parameter save area.
5536       for (unsigned j=0; j<Size; j+=PtrByteSize) {
5537         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
5538         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
5539         if (GPR_idx != NumGPRs) {
5540           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
5541                                      MachinePointerInfo(),
5542                                      false, false, false, 0);
5543           MemOpChains.push_back(Load.getValue(1));
5544           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5545           ArgOffset += PtrByteSize;
5546         } else {
5547           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
5548           break;
5549         }
5550       }
5551       continue;
5552     }
5553
5554     switch (Arg.getSimpleValueType().SimpleTy) {
5555     default: llvm_unreachable("Unexpected ValueType for argument!");
5556     case MVT::i1:
5557     case MVT::i32:
5558     case MVT::i64:
5559       if (GPR_idx != NumGPRs) {
5560         if (Arg.getValueType() == MVT::i1)
5561           Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
5562
5563         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
5564       } else {
5565         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5566                          isPPC64, isTailCall, false, MemOpChains,
5567                          TailCallArguments, dl);
5568       }
5569       ArgOffset += PtrByteSize;
5570       break;
5571     case MVT::f32:
5572     case MVT::f64:
5573       if (FPR_idx != NumFPRs) {
5574         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
5575
5576         if (isVarArg) {
5577           SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5578                                        MachinePointerInfo(), false, false, 0);
5579           MemOpChains.push_back(Store);
5580
5581           // Float varargs are always shadowed in available integer registers
5582           if (GPR_idx != NumGPRs) {
5583             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
5584                                        MachinePointerInfo(), false, false,
5585                                        false, 0);
5586             MemOpChains.push_back(Load.getValue(1));
5587             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5588           }
5589           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
5590             SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
5591             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
5592             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
5593                                        MachinePointerInfo(),
5594                                        false, false, false, 0);
5595             MemOpChains.push_back(Load.getValue(1));
5596             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5597           }
5598         } else {
5599           // If we have any FPRs remaining, we may also have GPRs remaining.
5600           // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
5601           // GPRs.
5602           if (GPR_idx != NumGPRs)
5603             ++GPR_idx;
5604           if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
5605               !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
5606             ++GPR_idx;
5607         }
5608       } else
5609         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5610                          isPPC64, isTailCall, false, MemOpChains,
5611                          TailCallArguments, dl);
5612       if (isPPC64)
5613         ArgOffset += 8;
5614       else
5615         ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
5616       break;
5617     case MVT::v4f32:
5618     case MVT::v4i32:
5619     case MVT::v8i16:
5620     case MVT::v16i8:
5621       if (isVarArg) {
5622         // These go aligned on the stack, or in the corresponding R registers
5623         // when within range.  The Darwin PPC ABI doc claims they also go in
5624         // V registers; in fact gcc does this only for arguments that are
5625         // prototyped, not for those that match the ...  We do it for all
5626         // arguments, seems to work.
5627         while (ArgOffset % 16 !=0) {
5628           ArgOffset += PtrByteSize;
5629           if (GPR_idx != NumGPRs)
5630             GPR_idx++;
5631         }
5632         // We could elide this store in the case where the object fits
5633         // entirely in R registers.  Maybe later.
5634         PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5635                              DAG.getConstant(ArgOffset, dl, PtrVT));
5636         SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
5637                                      MachinePointerInfo(), false, false, 0);
5638         MemOpChains.push_back(Store);
5639         if (VR_idx != NumVRs) {
5640           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
5641                                      MachinePointerInfo(),
5642                                      false, false, false, 0);
5643           MemOpChains.push_back(Load.getValue(1));
5644           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
5645         }
5646         ArgOffset += 16;
5647         for (unsigned i=0; i<16; i+=PtrByteSize) {
5648           if (GPR_idx == NumGPRs)
5649             break;
5650           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
5651                                    DAG.getConstant(i, dl, PtrVT));
5652           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
5653                                      false, false, false, 0);
5654           MemOpChains.push_back(Load.getValue(1));
5655           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5656         }
5657         break;
5658       }
5659
5660       // Non-varargs Altivec params generally go in registers, but have
5661       // stack space allocated at the end.
5662       if (VR_idx != NumVRs) {
5663         // Doesn't have GPR space allocated.
5664         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
5665       } else if (nAltivecParamsAtEnd==0) {
5666         // We are emitting Altivec params in order.
5667         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5668                          isPPC64, isTailCall, true, MemOpChains,
5669                          TailCallArguments, dl);
5670         ArgOffset += 16;
5671       }
5672       break;
5673     }
5674   }
5675   // If all Altivec parameters fit in registers, as they usually do,
5676   // they get stack space following the non-Altivec parameters.  We
5677   // don't track this here because nobody below needs it.
5678   // If there are more Altivec parameters than fit in registers emit
5679   // the stores here.
5680   if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
5681     unsigned j = 0;
5682     // Offset is aligned; skip 1st 12 params which go in V registers.
5683     ArgOffset = ((ArgOffset+15)/16)*16;
5684     ArgOffset += 12*16;
5685     for (unsigned i = 0; i != NumOps; ++i) {
5686       SDValue Arg = OutVals[i];
5687       EVT ArgType = Outs[i].VT;
5688       if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
5689           ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
5690         if (++j > NumVRs) {
5691           SDValue PtrOff;
5692           // We are emitting Altivec params in order.
5693           LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
5694                            isPPC64, isTailCall, true, MemOpChains,
5695                            TailCallArguments, dl);
5696           ArgOffset += 16;
5697         }
5698       }
5699     }
5700   }
5701
5702   if (!MemOpChains.empty())
5703     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5704
5705   // On Darwin, R12 must contain the address of an indirect callee.  This does
5706   // not mean the MTCTR instruction must use R12; it's easier to model this as
5707   // an extra parameter, so do that.
5708   if (!isTailCall &&
5709       !isFunctionGlobalAddress(Callee) &&
5710       !isa<ExternalSymbolSDNode>(Callee) &&
5711       !isBLACompatibleAddress(Callee, DAG))
5712     RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
5713                                                    PPC::R12), Callee));
5714
5715   // Build a sequence of copy-to-reg nodes chained together with token chain
5716   // and flag operands which copy the outgoing args into the appropriate regs.
5717   SDValue InFlag;
5718   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5719     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5720                              RegsToPass[i].second, InFlag);
5721     InFlag = Chain.getValue(1);
5722   }
5723
5724   if (isTailCall)
5725     PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
5726                     FPOp, true, TailCallArguments);
5727
5728   return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
5729                     /* unused except on PPC64 ELFv1 */ false, DAG,
5730                     RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
5731                     NumBytes, Ins, InVals, CS);
5732 }
5733
5734 bool
5735 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
5736                                   MachineFunction &MF, bool isVarArg,
5737                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
5738                                   LLVMContext &Context) const {
5739   SmallVector<CCValAssign, 16> RVLocs;
5740   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
5741   return CCInfo.CheckReturn(Outs, RetCC_PPC);
5742 }
5743
5744 SDValue
5745 PPCTargetLowering::LowerReturn(SDValue Chain,
5746                                CallingConv::ID CallConv, bool isVarArg,
5747                                const SmallVectorImpl<ISD::OutputArg> &Outs,
5748                                const SmallVectorImpl<SDValue> &OutVals,
5749                                SDLoc dl, SelectionDAG &DAG) const {
5750
5751   SmallVector<CCValAssign, 16> RVLocs;
5752   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5753                  *DAG.getContext());
5754   CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
5755
5756   SDValue Flag;
5757   SmallVector<SDValue, 4> RetOps(1, Chain);
5758
5759   // Copy the result values into the output registers.
5760   for (unsigned i = 0; i != RVLocs.size(); ++i) {
5761     CCValAssign &VA = RVLocs[i];
5762     assert(VA.isRegLoc() && "Can only return in registers!");
5763
5764     SDValue Arg = OutVals[i];
5765
5766     switch (VA.getLocInfo()) {
5767     default: llvm_unreachable("Unknown loc info!");
5768     case CCValAssign::Full: break;
5769     case CCValAssign::AExt:
5770       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
5771       break;
5772     case CCValAssign::ZExt:
5773       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
5774       break;
5775     case CCValAssign::SExt:
5776       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
5777       break;
5778     }
5779
5780     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
5781     Flag = Chain.getValue(1);
5782     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5783   }
5784
5785   RetOps[0] = Chain;  // Update chain.
5786
5787   // Add the flag if we have it.
5788   if (Flag.getNode())
5789     RetOps.push_back(Flag);
5790
5791   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
5792 }
5793
5794 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
5795                                    const PPCSubtarget &Subtarget) const {
5796   // When we pop the dynamic allocation we need to restore the SP link.
5797   SDLoc dl(Op);
5798
5799   // Get the corect type for pointers.
5800   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5801
5802   // Construct the stack pointer operand.
5803   bool isPPC64 = Subtarget.isPPC64();
5804   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
5805   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
5806
5807   // Get the operands for the STACKRESTORE.
5808   SDValue Chain = Op.getOperand(0);
5809   SDValue SaveSP = Op.getOperand(1);
5810
5811   // Load the old link SP.
5812   SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
5813                                    MachinePointerInfo(),
5814                                    false, false, false, 0);
5815
5816   // Restore the stack pointer.
5817   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
5818
5819   // Store the old link SP.
5820   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
5821                       false, false, 0);
5822 }
5823
5824
5825
5826 SDValue
5827 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
5828   MachineFunction &MF = DAG.getMachineFunction();
5829   bool isPPC64 = Subtarget.isPPC64();
5830   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
5831
5832   // Get current frame pointer save index.  The users of this index will be
5833   // primarily DYNALLOC instructions.
5834   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
5835   int RASI = FI->getReturnAddrSaveIndex();
5836
5837   // If the frame pointer save index hasn't been defined yet.
5838   if (!RASI) {
5839     // Find out what the fix offset of the frame pointer save area.
5840     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
5841     // Allocate the frame index for frame pointer save area.
5842     RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
5843     // Save the result.
5844     FI->setReturnAddrSaveIndex(RASI);
5845   }
5846   return DAG.getFrameIndex(RASI, PtrVT);
5847 }
5848
5849 SDValue
5850 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
5851   MachineFunction &MF = DAG.getMachineFunction();
5852   bool isPPC64 = Subtarget.isPPC64();
5853   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
5854
5855   // Get current frame pointer save index.  The users of this index will be
5856   // primarily DYNALLOC instructions.
5857   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
5858   int FPSI = FI->getFramePointerSaveIndex();
5859
5860   // If the frame pointer save index hasn't been defined yet.
5861   if (!FPSI) {
5862     // Find out what the fix offset of the frame pointer save area.
5863     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
5864     // Allocate the frame index for frame pointer save area.
5865     FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
5866     // Save the result.
5867     FI->setFramePointerSaveIndex(FPSI);
5868   }
5869   return DAG.getFrameIndex(FPSI, PtrVT);
5870 }
5871
5872 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
5873                                          SelectionDAG &DAG,
5874                                          const PPCSubtarget &Subtarget) const {
5875   // Get the inputs.
5876   SDValue Chain = Op.getOperand(0);
5877   SDValue Size  = Op.getOperand(1);
5878   SDLoc dl(Op);
5879
5880   // Get the corect type for pointers.
5881   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5882   // Negate the size.
5883   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
5884                                 DAG.getConstant(0, dl, PtrVT), Size);
5885   // Construct a node for the frame pointer save index.
5886   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
5887   // Build a DYNALLOC node.
5888   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
5889   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
5890   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
5891 }
5892
5893 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
5894                                                SelectionDAG &DAG) const {
5895   SDLoc DL(Op);
5896   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
5897                      DAG.getVTList(MVT::i32, MVT::Other),
5898                      Op.getOperand(0), Op.getOperand(1));
5899 }
5900
5901 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
5902                                                 SelectionDAG &DAG) const {
5903   SDLoc DL(Op);
5904   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
5905                      Op.getOperand(0), Op.getOperand(1));
5906 }
5907
5908 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
5909   if (Op.getValueType().isVector())
5910     return LowerVectorLoad(Op, DAG);
5911
5912   assert(Op.getValueType() == MVT::i1 &&
5913          "Custom lowering only for i1 loads");
5914
5915   // First, load 8 bits into 32 bits, then truncate to 1 bit.
5916
5917   SDLoc dl(Op);
5918   LoadSDNode *LD = cast<LoadSDNode>(Op);
5919
5920   SDValue Chain = LD->getChain();
5921   SDValue BasePtr = LD->getBasePtr();
5922   MachineMemOperand *MMO = LD->getMemOperand();
5923
5924   SDValue NewLD =
5925       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
5926                      BasePtr, MVT::i8, MMO);
5927   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
5928
5929   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
5930   return DAG.getMergeValues(Ops, dl);
5931 }
5932
5933 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
5934   if (Op.getOperand(1).getValueType().isVector())
5935     return LowerVectorStore(Op, DAG);
5936
5937   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
5938          "Custom lowering only for i1 stores");
5939
5940   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
5941
5942   SDLoc dl(Op);
5943   StoreSDNode *ST = cast<StoreSDNode>(Op);
5944
5945   SDValue Chain = ST->getChain();
5946   SDValue BasePtr = ST->getBasePtr();
5947   SDValue Value = ST->getValue();
5948   MachineMemOperand *MMO = ST->getMemOperand();
5949
5950   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
5951                       Value);
5952   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
5953 }
5954
5955 // FIXME: Remove this once the ANDI glue bug is fixed:
5956 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
5957   assert(Op.getValueType() == MVT::i1 &&
5958          "Custom lowering only for i1 results");
5959
5960   SDLoc DL(Op);
5961   return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
5962                      Op.getOperand(0));
5963 }
5964
5965 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
5966 /// possible.
5967 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5968   // Not FP? Not a fsel.
5969   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
5970       !Op.getOperand(2).getValueType().isFloatingPoint())
5971     return Op;
5972
5973   // We might be able to do better than this under some circumstances, but in
5974   // general, fsel-based lowering of select is a finite-math-only optimization.
5975   // For more information, see section F.3 of the 2.06 ISA specification.
5976   if (!DAG.getTarget().Options.NoInfsFPMath ||
5977       !DAG.getTarget().Options.NoNaNsFPMath)
5978     return Op;
5979
5980   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5981
5982   EVT ResVT = Op.getValueType();
5983   EVT CmpVT = Op.getOperand(0).getValueType();
5984   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
5985   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
5986   SDLoc dl(Op);
5987
5988   // If the RHS of the comparison is a 0.0, we don't need to do the
5989   // subtraction at all.
5990   SDValue Sel1;
5991   if (isFloatingPointZero(RHS))
5992     switch (CC) {
5993     default: break;       // SETUO etc aren't handled by fsel.
5994     case ISD::SETNE:
5995       std::swap(TV, FV);
5996     case ISD::SETEQ:
5997       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
5998         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
5999       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6000       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6001         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6002       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6003                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
6004     case ISD::SETULT:
6005     case ISD::SETLT:
6006       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6007     case ISD::SETOGE:
6008     case ISD::SETGE:
6009       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6010         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6011       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
6012     case ISD::SETUGT:
6013     case ISD::SETGT:
6014       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
6015     case ISD::SETOLE:
6016     case ISD::SETLE:
6017       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
6018         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
6019       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6020                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
6021     }
6022
6023   SDValue Cmp;
6024   switch (CC) {
6025   default: break;       // SETUO etc aren't handled by fsel.
6026   case ISD::SETNE:
6027     std::swap(TV, FV);
6028   case ISD::SETEQ:
6029     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
6030     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6031       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6032     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6033     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
6034       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
6035     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
6036                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
6037   case ISD::SETULT:
6038   case ISD::SETLT:
6039     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
6040     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6041       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6042     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6043   case ISD::SETOGE:
6044   case ISD::SETGE:
6045     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
6046     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6047       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6048     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6049   case ISD::SETUGT:
6050   case ISD::SETGT:
6051     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
6052     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6053       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6054     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
6055   case ISD::SETOLE:
6056   case ISD::SETLE:
6057     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
6058     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
6059       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
6060     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
6061   }
6062   return Op;
6063 }
6064
6065 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
6066                                                SelectionDAG &DAG,
6067                                                SDLoc dl) const {
6068   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6069   SDValue Src = Op.getOperand(0);
6070   if (Src.getValueType() == MVT::f32)
6071     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6072
6073   SDValue Tmp;
6074   switch (Op.getSimpleValueType().SimpleTy) {
6075   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6076   case MVT::i32:
6077     Tmp = DAG.getNode(
6078         Op.getOpcode() == ISD::FP_TO_SINT
6079             ? PPCISD::FCTIWZ
6080             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6081         dl, MVT::f64, Src);
6082     break;
6083   case MVT::i64:
6084     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6085            "i64 FP_TO_UINT is supported only with FPCVT");
6086     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6087                                                         PPCISD::FCTIDUZ,
6088                       dl, MVT::f64, Src);
6089     break;
6090   }
6091
6092   // Convert the FP value to an int value through memory.
6093   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
6094     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
6095   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
6096   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
6097   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
6098
6099   // Emit a store to the stack slot.
6100   SDValue Chain;
6101   if (i32Stack) {
6102     MachineFunction &MF = DAG.getMachineFunction();
6103     MachineMemOperand *MMO =
6104       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
6105     SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
6106     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
6107               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
6108   } else
6109     Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
6110                          MPI, false, false, 0);
6111
6112   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
6113   // add in a bias.
6114   if (Op.getValueType() == MVT::i32 && !i32Stack) {
6115     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
6116                         DAG.getConstant(4, dl, FIPtr.getValueType()));
6117     MPI = MPI.getWithOffset(4);
6118   }
6119
6120   RLI.Chain = Chain;
6121   RLI.Ptr = FIPtr;
6122   RLI.MPI = MPI;
6123 }
6124
6125 /// \brief Custom lowers floating point to integer conversions to use
6126 /// the direct move instructions available in ISA 2.07 to avoid the
6127 /// need for load/store combinations.
6128 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
6129                                                     SelectionDAG &DAG,
6130                                                     SDLoc dl) const {
6131   assert(Op.getOperand(0).getValueType().isFloatingPoint());
6132   SDValue Src = Op.getOperand(0);
6133
6134   if (Src.getValueType() == MVT::f32)
6135     Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
6136
6137   SDValue Tmp;
6138   switch (Op.getSimpleValueType().SimpleTy) {
6139   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6140   case MVT::i32:
6141     Tmp = DAG.getNode(
6142         Op.getOpcode() == ISD::FP_TO_SINT
6143             ? PPCISD::FCTIWZ
6144             : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ),
6145         dl, MVT::f64, Src);
6146     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp);
6147     break;
6148   case MVT::i64:
6149     assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
6150            "i64 FP_TO_UINT is supported only with FPCVT");
6151     Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
6152                                                         PPCISD::FCTIDUZ,
6153                       dl, MVT::f64, Src);
6154     Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp);
6155     break;
6156   }
6157   return Tmp;
6158 }
6159
6160 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
6161                                           SDLoc dl) const {
6162   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
6163     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
6164
6165   ReuseLoadInfo RLI;
6166   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6167
6168   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
6169                      false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
6170                      RLI.Ranges);
6171 }
6172
6173 // We're trying to insert a regular store, S, and then a load, L. If the
6174 // incoming value, O, is a load, we might just be able to have our load use the
6175 // address used by O. However, we don't know if anything else will store to
6176 // that address before we can load from it. To prevent this situation, we need
6177 // to insert our load, L, into the chain as a peer of O. To do this, we give L
6178 // the same chain operand as O, we create a token factor from the chain results
6179 // of O and L, and we replace all uses of O's chain result with that token
6180 // factor (see spliceIntoChain below for this last part).
6181 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
6182                                             ReuseLoadInfo &RLI,
6183                                             SelectionDAG &DAG,
6184                                             ISD::LoadExtType ET) const {
6185   SDLoc dl(Op);
6186   if (ET == ISD::NON_EXTLOAD &&
6187       (Op.getOpcode() == ISD::FP_TO_UINT ||
6188        Op.getOpcode() == ISD::FP_TO_SINT) &&
6189       isOperationLegalOrCustom(Op.getOpcode(),
6190                                Op.getOperand(0).getValueType())) {
6191
6192     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
6193     return true;
6194   }
6195
6196   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
6197   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
6198       LD->isNonTemporal())
6199     return false;
6200   if (LD->getMemoryVT() != MemVT)
6201     return false;
6202
6203   RLI.Ptr = LD->getBasePtr();
6204   if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) {
6205     assert(LD->getAddressingMode() == ISD::PRE_INC &&
6206            "Non-pre-inc AM on PPC?");
6207     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
6208                           LD->getOffset());
6209   }
6210
6211   RLI.Chain = LD->getChain();
6212   RLI.MPI = LD->getPointerInfo();
6213   RLI.IsInvariant = LD->isInvariant();
6214   RLI.Alignment = LD->getAlignment();
6215   RLI.AAInfo = LD->getAAInfo();
6216   RLI.Ranges = LD->getRanges();
6217
6218   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
6219   return true;
6220 }
6221
6222 // Given the head of the old chain, ResChain, insert a token factor containing
6223 // it and NewResChain, and make users of ResChain now be users of that token
6224 // factor.
6225 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
6226                                         SDValue NewResChain,
6227                                         SelectionDAG &DAG) const {
6228   if (!ResChain)
6229     return;
6230
6231   SDLoc dl(NewResChain);
6232
6233   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
6234                            NewResChain, DAG.getUNDEF(MVT::Other));
6235   assert(TF.getNode() != NewResChain.getNode() &&
6236          "A new TF really is required here");
6237
6238   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
6239   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
6240 }
6241
6242 /// \brief Custom lowers integer to floating point conversions to use
6243 /// the direct move instructions available in ISA 2.07 to avoid the
6244 /// need for load/store combinations.
6245 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
6246                                                     SelectionDAG &DAG,
6247                                                     SDLoc dl) const {
6248   assert((Op.getValueType() == MVT::f32 ||
6249           Op.getValueType() == MVT::f64) &&
6250          "Invalid floating point type as target of conversion");
6251   assert(Subtarget.hasFPCVT() &&
6252          "Int to FP conversions with direct moves require FPCVT");
6253   SDValue FP;
6254   SDValue Src = Op.getOperand(0);
6255   bool SinglePrec = Op.getValueType() == MVT::f32;
6256   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
6257   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP;
6258   unsigned ConvOp = Signed ? (SinglePrec ? PPCISD::FCFIDS : PPCISD::FCFID) :
6259                              (SinglePrec ? PPCISD::FCFIDUS : PPCISD::FCFIDU);
6260
6261   if (WordInt) {
6262     FP = DAG.getNode(Signed ? PPCISD::MTVSRA : PPCISD::MTVSRZ,
6263                      dl, MVT::f64, Src);
6264     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6265   }
6266   else {
6267     FP = DAG.getNode(PPCISD::MTVSRA, dl, MVT::f64, Src);
6268     FP = DAG.getNode(ConvOp, dl, SinglePrec ? MVT::f32 : MVT::f64, FP);
6269   }
6270
6271   return FP;
6272 }
6273
6274 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
6275                                           SelectionDAG &DAG) const {
6276   SDLoc dl(Op);
6277
6278   if (Subtarget.hasQPX() && Op.getOperand(0).getValueType() == MVT::v4i1) {
6279     if (Op.getValueType() != MVT::v4f32 && Op.getValueType() != MVT::v4f64)
6280       return SDValue();
6281
6282     SDValue Value = Op.getOperand(0);
6283     // The values are now known to be -1 (false) or 1 (true). To convert this
6284     // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
6285     // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
6286     Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
6287
6288     SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
6289     FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
6290                           FPHalfs, FPHalfs, FPHalfs, FPHalfs);
6291
6292     Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
6293
6294     if (Op.getValueType() != MVT::v4f64)
6295       Value = DAG.getNode(ISD::FP_ROUND, dl,
6296                           Op.getValueType(), Value,
6297                           DAG.getIntPtrConstant(1, dl));
6298     return Value;
6299   }
6300
6301   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
6302   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
6303     return SDValue();
6304
6305   if (Op.getOperand(0).getValueType() == MVT::i1)
6306     return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
6307                        DAG.getConstantFP(1.0, dl, Op.getValueType()),
6308                        DAG.getConstantFP(0.0, dl, Op.getValueType()));
6309
6310   // If we have direct moves, we can do all the conversion, skip the store/load
6311   // however, without FPCVT we can't do most conversions.
6312   if (Subtarget.hasDirectMove() && Subtarget.isPPC64() && Subtarget.hasFPCVT())
6313     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
6314
6315   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
6316          "UINT_TO_FP is supported only with FPCVT");
6317
6318   // If we have FCFIDS, then use it when converting to single-precision.
6319   // Otherwise, convert to double-precision and then round.
6320   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6321                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
6322                                                             : PPCISD::FCFIDS)
6323                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
6324                                                             : PPCISD::FCFID);
6325   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
6326                   ? MVT::f32
6327                   : MVT::f64;
6328
6329   if (Op.getOperand(0).getValueType() == MVT::i64) {
6330     SDValue SINT = Op.getOperand(0);
6331     // When converting to single-precision, we actually need to convert
6332     // to double-precision first and then round to single-precision.
6333     // To avoid double-rounding effects during that operation, we have
6334     // to prepare the input operand.  Bits that might be truncated when
6335     // converting to double-precision are replaced by a bit that won't
6336     // be lost at this stage, but is below the single-precision rounding
6337     // position.
6338     //
6339     // However, if -enable-unsafe-fp-math is in effect, accept double
6340     // rounding to avoid the extra overhead.
6341     if (Op.getValueType() == MVT::f32 &&
6342         !Subtarget.hasFPCVT() &&
6343         !DAG.getTarget().Options.UnsafeFPMath) {
6344
6345       // Twiddle input to make sure the low 11 bits are zero.  (If this
6346       // is the case, we are guaranteed the value will fit into the 53 bit
6347       // mantissa of an IEEE double-precision value without rounding.)
6348       // If any of those low 11 bits were not zero originally, make sure
6349       // bit 12 (value 2048) is set instead, so that the final rounding
6350       // to single-precision gets the correct result.
6351       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6352                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
6353       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
6354                           Round, DAG.getConstant(2047, dl, MVT::i64));
6355       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
6356       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
6357                           Round, DAG.getConstant(-2048, dl, MVT::i64));
6358
6359       // However, we cannot use that value unconditionally: if the magnitude
6360       // of the input value is small, the bit-twiddling we did above might
6361       // end up visibly changing the output.  Fortunately, in that case, we
6362       // don't need to twiddle bits since the original input will convert
6363       // exactly to double-precision floating-point already.  Therefore,
6364       // construct a conditional to use the original value if the top 11
6365       // bits are all sign-bit copies, and use the rounded value computed
6366       // above otherwise.
6367       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
6368                                  SINT, DAG.getConstant(53, dl, MVT::i32));
6369       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
6370                          Cond, DAG.getConstant(1, dl, MVT::i64));
6371       Cond = DAG.getSetCC(dl, MVT::i32,
6372                           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
6373
6374       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
6375     }
6376
6377     ReuseLoadInfo RLI;
6378     SDValue Bits;
6379
6380     MachineFunction &MF = DAG.getMachineFunction();
6381     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
6382       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false,
6383                          false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo,
6384                          RLI.Ranges);
6385       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6386     } else if (Subtarget.hasLFIWAX() &&
6387                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
6388       MachineMemOperand *MMO =
6389         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6390                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6391       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6392       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
6393                                      DAG.getVTList(MVT::f64, MVT::Other),
6394                                      Ops, MVT::i32, MMO);
6395       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6396     } else if (Subtarget.hasFPCVT() &&
6397                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
6398       MachineMemOperand *MMO =
6399         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6400                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6401       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6402       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
6403                                      DAG.getVTList(MVT::f64, MVT::Other),
6404                                      Ops, MVT::i32, MMO);
6405       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
6406     } else if (((Subtarget.hasLFIWAX() &&
6407                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
6408                 (Subtarget.hasFPCVT() &&
6409                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
6410                SINT.getOperand(0).getValueType() == MVT::i32) {
6411       MachineFrameInfo *FrameInfo = MF.getFrameInfo();
6412       EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
6413
6414       int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
6415       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6416
6417       SDValue Store =
6418         DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
6419                      MachinePointerInfo::getFixedStack(FrameIdx),
6420                      false, false, 0);
6421
6422       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6423              "Expected an i32 store");
6424
6425       RLI.Ptr = FIdx;
6426       RLI.Chain = Store;
6427       RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
6428       RLI.Alignment = 4;
6429
6430       MachineMemOperand *MMO =
6431         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6432                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6433       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6434       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
6435                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
6436                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
6437                                      Ops, MVT::i32, MMO);
6438     } else
6439       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
6440
6441     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
6442
6443     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6444       FP = DAG.getNode(ISD::FP_ROUND, dl,
6445                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
6446     return FP;
6447   }
6448
6449   assert(Op.getOperand(0).getValueType() == MVT::i32 &&
6450          "Unhandled INT_TO_FP type in custom expander!");
6451   // Since we only generate this in 64-bit mode, we can take advantage of
6452   // 64-bit registers.  In particular, sign extend the input value into the
6453   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
6454   // then lfd it and fcfid it.
6455   MachineFunction &MF = DAG.getMachineFunction();
6456   MachineFrameInfo *FrameInfo = MF.getFrameInfo();
6457   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
6458
6459   SDValue Ld;
6460   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
6461     ReuseLoadInfo RLI;
6462     bool ReusingLoad;
6463     if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
6464                                             DAG))) {
6465       int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
6466       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6467
6468       SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
6469                                    MachinePointerInfo::getFixedStack(FrameIdx),
6470                                    false, false, 0);
6471
6472       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
6473              "Expected an i32 store");
6474
6475       RLI.Ptr = FIdx;
6476       RLI.Chain = Store;
6477       RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
6478       RLI.Alignment = 4;
6479     }
6480
6481     MachineMemOperand *MMO =
6482       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
6483                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
6484     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
6485     Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
6486                                    PPCISD::LFIWZX : PPCISD::LFIWAX,
6487                                  dl, DAG.getVTList(MVT::f64, MVT::Other),
6488                                  Ops, MVT::i32, MMO);
6489     if (ReusingLoad)
6490       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
6491   } else {
6492     assert(Subtarget.isPPC64() &&
6493            "i32->FP without LFIWAX supported only on PPC64");
6494
6495     int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
6496     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6497
6498     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
6499                                 Op.getOperand(0));
6500
6501     // STD the extended value into the stack slot.
6502     SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
6503                                  MachinePointerInfo::getFixedStack(FrameIdx),
6504                                  false, false, 0);
6505
6506     // Load the value as a double.
6507     Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
6508                      MachinePointerInfo::getFixedStack(FrameIdx),
6509                      false, false, false, 0);
6510   }
6511
6512   // FCFID it and return it.
6513   SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
6514   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
6515     FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
6516                      DAG.getIntPtrConstant(0, dl));
6517   return FP;
6518 }
6519
6520 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
6521                                             SelectionDAG &DAG) const {
6522   SDLoc dl(Op);
6523   /*
6524    The rounding mode is in bits 30:31 of FPSR, and has the following
6525    settings:
6526      00 Round to nearest
6527      01 Round to 0
6528      10 Round to +inf
6529      11 Round to -inf
6530
6531   FLT_ROUNDS, on the other hand, expects the following:
6532     -1 Undefined
6533      0 Round to 0
6534      1 Round to nearest
6535      2 Round to +inf
6536      3 Round to -inf
6537
6538   To perform the conversion, we do:
6539     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
6540   */
6541
6542   MachineFunction &MF = DAG.getMachineFunction();
6543   EVT VT = Op.getValueType();
6544   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
6545
6546   // Save FP Control Word to register
6547   EVT NodeTys[] = {
6548     MVT::f64,    // return register
6549     MVT::Glue    // unused in this context
6550   };
6551   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
6552
6553   // Save FP register to stack slot
6554   int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
6555   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
6556   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
6557                                StackSlot, MachinePointerInfo(), false, false,0);
6558
6559   // Load FP Control Word from low 32 bits of stack slot.
6560   SDValue Four = DAG.getConstant(4, dl, PtrVT);
6561   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
6562   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
6563                             false, false, false, 0);
6564
6565   // Transform as necessary
6566   SDValue CWD1 =
6567     DAG.getNode(ISD::AND, dl, MVT::i32,
6568                 CWD, DAG.getConstant(3, dl, MVT::i32));
6569   SDValue CWD2 =
6570     DAG.getNode(ISD::SRL, dl, MVT::i32,
6571                 DAG.getNode(ISD::AND, dl, MVT::i32,
6572                             DAG.getNode(ISD::XOR, dl, MVT::i32,
6573                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
6574                             DAG.getConstant(3, dl, MVT::i32)),
6575                 DAG.getConstant(1, dl, MVT::i32));
6576
6577   SDValue RetVal =
6578     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
6579
6580   return DAG.getNode((VT.getSizeInBits() < 16 ?
6581                       ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
6582 }
6583
6584 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6585   EVT VT = Op.getValueType();
6586   unsigned BitWidth = VT.getSizeInBits();
6587   SDLoc dl(Op);
6588   assert(Op.getNumOperands() == 3 &&
6589          VT == Op.getOperand(1).getValueType() &&
6590          "Unexpected SHL!");
6591
6592   // Expand into a bunch of logical ops.  Note that these ops
6593   // depend on the PPC behavior for oversized shift amounts.
6594   SDValue Lo = Op.getOperand(0);
6595   SDValue Hi = Op.getOperand(1);
6596   SDValue Amt = Op.getOperand(2);
6597   EVT AmtVT = Amt.getValueType();
6598
6599   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6600                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6601   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
6602   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
6603   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
6604   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6605                              DAG.getConstant(-BitWidth, dl, AmtVT));
6606   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
6607   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6608   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
6609   SDValue OutOps[] = { OutLo, OutHi };
6610   return DAG.getMergeValues(OutOps, dl);
6611 }
6612
6613 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
6614   EVT VT = Op.getValueType();
6615   SDLoc dl(Op);
6616   unsigned BitWidth = VT.getSizeInBits();
6617   assert(Op.getNumOperands() == 3 &&
6618          VT == Op.getOperand(1).getValueType() &&
6619          "Unexpected SRL!");
6620
6621   // Expand into a bunch of logical ops.  Note that these ops
6622   // depend on the PPC behavior for oversized shift amounts.
6623   SDValue Lo = Op.getOperand(0);
6624   SDValue Hi = Op.getOperand(1);
6625   SDValue Amt = Op.getOperand(2);
6626   EVT AmtVT = Amt.getValueType();
6627
6628   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6629                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6630   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
6631   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
6632   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
6633   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6634                              DAG.getConstant(-BitWidth, dl, AmtVT));
6635   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
6636   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
6637   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
6638   SDValue OutOps[] = { OutLo, OutHi };
6639   return DAG.getMergeValues(OutOps, dl);
6640 }
6641
6642 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
6643   SDLoc dl(Op);
6644   EVT VT = Op.getValueType();
6645   unsigned BitWidth = VT.getSizeInBits();
6646   assert(Op.getNumOperands() == 3 &&
6647          VT == Op.getOperand(1).getValueType() &&
6648          "Unexpected SRA!");
6649
6650   // Expand into a bunch of logical ops, followed by a select_cc.
6651   SDValue Lo = Op.getOperand(0);
6652   SDValue Hi = Op.getOperand(1);
6653   SDValue Amt = Op.getOperand(2);
6654   EVT AmtVT = Amt.getValueType();
6655
6656   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
6657                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
6658   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
6659   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
6660   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
6661   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
6662                              DAG.getConstant(-BitWidth, dl, AmtVT));
6663   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
6664   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
6665   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
6666                                   Tmp4, Tmp6, ISD::SETLE);
6667   SDValue OutOps[] = { OutLo, OutHi };
6668   return DAG.getMergeValues(OutOps, dl);
6669 }
6670
6671 //===----------------------------------------------------------------------===//
6672 // Vector related lowering.
6673 //
6674
6675 /// BuildSplatI - Build a canonical splati of Val with an element size of
6676 /// SplatSize.  Cast the result to VT.
6677 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
6678                              SelectionDAG &DAG, SDLoc dl) {
6679   assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
6680
6681   static const MVT VTys[] = { // canonical VT to use for each size.
6682     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
6683   };
6684
6685   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
6686
6687   // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
6688   if (Val == -1)
6689     SplatSize = 1;
6690
6691   EVT CanonicalVT = VTys[SplatSize-1];
6692
6693   // Build a canonical splat for this value.
6694   SDValue Elt = DAG.getConstant(Val, dl, MVT::i32);
6695   SmallVector<SDValue, 8> Ops;
6696   Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
6697   SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
6698   return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
6699 }
6700
6701 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
6702 /// specified intrinsic ID.
6703 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
6704                                 SelectionDAG &DAG, SDLoc dl,
6705                                 EVT DestVT = MVT::Other) {
6706   if (DestVT == MVT::Other) DestVT = Op.getValueType();
6707   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
6708                      DAG.getConstant(IID, dl, MVT::i32), Op);
6709 }
6710
6711 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
6712 /// specified intrinsic ID.
6713 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
6714                                 SelectionDAG &DAG, SDLoc dl,
6715                                 EVT DestVT = MVT::Other) {
6716   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
6717   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
6718                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
6719 }
6720
6721 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
6722 /// specified intrinsic ID.
6723 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
6724                                 SDValue Op2, SelectionDAG &DAG,
6725                                 SDLoc dl, EVT DestVT = MVT::Other) {
6726   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
6727   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
6728                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
6729 }
6730
6731
6732 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
6733 /// amount.  The result has the specified value type.
6734 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
6735                              EVT VT, SelectionDAG &DAG, SDLoc dl) {
6736   // Force LHS/RHS to be the right type.
6737   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
6738   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
6739
6740   int Ops[16];
6741   for (unsigned i = 0; i != 16; ++i)
6742     Ops[i] = i + Amt;
6743   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
6744   return DAG.getNode(ISD::BITCAST, dl, VT, T);
6745 }
6746
6747 // If this is a case we can't handle, return null and let the default
6748 // expansion code take care of it.  If we CAN select this case, and if it
6749 // selects to a single instruction, return Op.  Otherwise, if we can codegen
6750 // this case more efficiently than a constant pool load, lower it to the
6751 // sequence of ops that should be used.
6752 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
6753                                              SelectionDAG &DAG) const {
6754   SDLoc dl(Op);
6755   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6756   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
6757
6758   if (Subtarget.hasQPX() && Op.getValueType() == MVT::v4i1) {
6759     // We first build an i32 vector, load it into a QPX register,
6760     // then convert it to a floating-point vector and compare it
6761     // to a zero vector to get the boolean result.
6762     MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
6763     int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
6764     MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
6765     EVT PtrVT = getPointerTy(DAG.getDataLayout());
6766     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
6767
6768     assert(BVN->getNumOperands() == 4 &&
6769       "BUILD_VECTOR for v4i1 does not have 4 operands");
6770
6771     bool IsConst = true;
6772     for (unsigned i = 0; i < 4; ++i) {
6773       if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
6774       if (!isa<ConstantSDNode>(BVN->getOperand(i))) {
6775         IsConst = false;
6776         break;
6777       }
6778     }
6779
6780     if (IsConst) {
6781       Constant *One =
6782         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), 1.0);
6783       Constant *NegOne =
6784         ConstantFP::get(Type::getFloatTy(*DAG.getContext()), -1.0);
6785
6786       SmallVector<Constant*, 4> CV(4, NegOne);
6787       for (unsigned i = 0; i < 4; ++i) {
6788         if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
6789           CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
6790         else if (cast<ConstantSDNode>(BVN->getOperand(i))->
6791                    getConstantIntValue()->isZero())
6792           continue;
6793         else
6794           CV[i] = One;
6795       }
6796
6797       Constant *CP = ConstantVector::get(CV);
6798       SDValue CPIdx = DAG.getConstantPool(CP, getPointerTy(DAG.getDataLayout()),
6799                                           16 /* alignment */);
6800
6801       SmallVector<SDValue, 2> Ops;
6802       Ops.push_back(DAG.getEntryNode());
6803       Ops.push_back(CPIdx);
6804
6805       SmallVector<EVT, 2> ValueVTs;
6806       ValueVTs.push_back(MVT::v4i1);
6807       ValueVTs.push_back(MVT::Other); // chain
6808       SDVTList VTs = DAG.getVTList(ValueVTs);
6809
6810       return DAG.getMemIntrinsicNode(PPCISD::QVLFSb,
6811         dl, VTs, Ops, MVT::v4f32,
6812         MachinePointerInfo::getConstantPool());
6813     }
6814
6815     SmallVector<SDValue, 4> Stores;
6816     for (unsigned i = 0; i < 4; ++i) {
6817       if (BVN->getOperand(i).getOpcode() == ISD::UNDEF) continue;
6818
6819       unsigned Offset = 4*i;
6820       SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
6821       Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
6822
6823       unsigned StoreSize = BVN->getOperand(i).getValueType().getStoreSize();
6824       if (StoreSize > 4) {
6825         Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
6826                                            BVN->getOperand(i), Idx,
6827                                            PtrInfo.getWithOffset(Offset),
6828                                            MVT::i32, false, false, 0));
6829       } else {
6830         SDValue StoreValue = BVN->getOperand(i);
6831         if (StoreSize < 4)
6832           StoreValue = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, StoreValue);
6833
6834         Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
6835                                       StoreValue, Idx,
6836                                       PtrInfo.getWithOffset(Offset),
6837                                       false, false, 0));
6838       }
6839     }
6840
6841     SDValue StoreChain;
6842     if (!Stores.empty())
6843       StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
6844     else
6845       StoreChain = DAG.getEntryNode();
6846
6847     // Now load from v4i32 into the QPX register; this will extend it to
6848     // v4i64 but not yet convert it to a floating point. Nevertheless, this
6849     // is typed as v4f64 because the QPX register integer states are not
6850     // explicitly represented.
6851
6852     SmallVector<SDValue, 2> Ops;
6853     Ops.push_back(StoreChain);
6854     Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvlfiwz, dl, MVT::i32));
6855     Ops.push_back(FIdx);
6856
6857     SmallVector<EVT, 2> ValueVTs;
6858     ValueVTs.push_back(MVT::v4f64);
6859     ValueVTs.push_back(MVT::Other); // chain
6860     SDVTList VTs = DAG.getVTList(ValueVTs);
6861
6862     SDValue LoadedVect = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN,
6863       dl, VTs, Ops, MVT::v4i32, PtrInfo);
6864     LoadedVect = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
6865       DAG.getConstant(Intrinsic::ppc_qpx_qvfcfidu, dl, MVT::i32),
6866       LoadedVect);
6867
6868     SDValue FPZeros = DAG.getConstantFP(0.0, dl, MVT::f64);
6869     FPZeros = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
6870                           FPZeros, FPZeros, FPZeros, FPZeros);
6871
6872     return DAG.getSetCC(dl, MVT::v4i1, LoadedVect, FPZeros, ISD::SETEQ);
6873   }
6874
6875   // All other QPX vectors are handled by generic code.
6876   if (Subtarget.hasQPX())
6877     return SDValue();
6878
6879   // Check if this is a splat of a constant value.
6880   APInt APSplatBits, APSplatUndef;
6881   unsigned SplatBitSize;
6882   bool HasAnyUndefs;
6883   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
6884                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
6885       SplatBitSize > 32)
6886     return SDValue();
6887
6888   unsigned SplatBits = APSplatBits.getZExtValue();
6889   unsigned SplatUndef = APSplatUndef.getZExtValue();
6890   unsigned SplatSize = SplatBitSize / 8;
6891
6892   // First, handle single instruction cases.
6893
6894   // All zeros?
6895   if (SplatBits == 0) {
6896     // Canonicalize all zero vectors to be v4i32.
6897     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
6898       SDValue Z = DAG.getConstant(0, dl, MVT::i32);
6899       Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
6900       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
6901     }
6902     return Op;
6903   }
6904
6905   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
6906   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
6907                     (32-SplatBitSize));
6908   if (SextVal >= -16 && SextVal <= 15)
6909     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
6910
6911
6912   // Two instruction sequences.
6913
6914   // If this value is in the range [-32,30] and is even, use:
6915   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
6916   // If this value is in the range [17,31] and is odd, use:
6917   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
6918   // If this value is in the range [-31,-17] and is odd, use:
6919   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
6920   // Note the last two are three-instruction sequences.
6921   if (SextVal >= -32 && SextVal <= 31) {
6922     // To avoid having these optimizations undone by constant folding,
6923     // we convert to a pseudo that will be expanded later into one of
6924     // the above forms.
6925     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
6926     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
6927               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
6928     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
6929     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
6930     if (VT == Op.getValueType())
6931       return RetVal;
6932     else
6933       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
6934   }
6935
6936   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
6937   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
6938   // for fneg/fabs.
6939   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
6940     // Make -1 and vspltisw -1:
6941     SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
6942
6943     // Make the VSLW intrinsic, computing 0x8000_0000.
6944     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
6945                                    OnesV, DAG, dl);
6946
6947     // xor by OnesV to invert it.
6948     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
6949     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6950   }
6951
6952   // Check to see if this is a wide variety of vsplti*, binop self cases.
6953   static const signed char SplatCsts[] = {
6954     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
6955     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
6956   };
6957
6958   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
6959     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
6960     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
6961     int i = SplatCsts[idx];
6962
6963     // Figure out what shift amount will be used by altivec if shifted by i in
6964     // this splat size.
6965     unsigned TypeShiftAmt = i & (SplatBitSize-1);
6966
6967     // vsplti + shl self.
6968     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
6969       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6970       static const unsigned IIDs[] = { // Intrinsic to use for each size.
6971         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
6972         Intrinsic::ppc_altivec_vslw
6973       };
6974       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
6975       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6976     }
6977
6978     // vsplti + srl self.
6979     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
6980       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6981       static const unsigned IIDs[] = { // Intrinsic to use for each size.
6982         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
6983         Intrinsic::ppc_altivec_vsrw
6984       };
6985       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
6986       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6987     }
6988
6989     // vsplti + sra self.
6990     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
6991       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
6992       static const unsigned IIDs[] = { // Intrinsic to use for each size.
6993         Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
6994         Intrinsic::ppc_altivec_vsraw
6995       };
6996       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
6997       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
6998     }
6999
7000     // vsplti + rol self.
7001     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
7002                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
7003       SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
7004       static const unsigned IIDs[] = { // Intrinsic to use for each size.
7005         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
7006         Intrinsic::ppc_altivec_vrlw
7007       };
7008       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
7009       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
7010     }
7011
7012     // t = vsplti c, result = vsldoi t, t, 1
7013     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
7014       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7015       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
7016       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7017     }
7018     // t = vsplti c, result = vsldoi t, t, 2
7019     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
7020       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7021       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
7022       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7023     }
7024     // t = vsplti c, result = vsldoi t, t, 3
7025     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
7026       SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
7027       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
7028       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
7029     }
7030   }
7031
7032   return SDValue();
7033 }
7034
7035 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
7036 /// the specified operations to build the shuffle.
7037 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
7038                                       SDValue RHS, SelectionDAG &DAG,
7039                                       SDLoc dl) {
7040   unsigned OpNum = (PFEntry >> 26) & 0x0F;
7041   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
7042   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
7043
7044   enum {
7045     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
7046     OP_VMRGHW,
7047     OP_VMRGLW,
7048     OP_VSPLTISW0,
7049     OP_VSPLTISW1,
7050     OP_VSPLTISW2,
7051     OP_VSPLTISW3,
7052     OP_VSLDOI4,
7053     OP_VSLDOI8,
7054     OP_VSLDOI12
7055   };
7056
7057   if (OpNum == OP_COPY) {
7058     if (LHSID == (1*9+2)*9+3) return LHS;
7059     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
7060     return RHS;
7061   }
7062
7063   SDValue OpLHS, OpRHS;
7064   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
7065   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
7066
7067   int ShufIdxs[16];
7068   switch (OpNum) {
7069   default: llvm_unreachable("Unknown i32 permute!");
7070   case OP_VMRGHW:
7071     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
7072     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
7073     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
7074     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
7075     break;
7076   case OP_VMRGLW:
7077     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
7078     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
7079     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
7080     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
7081     break;
7082   case OP_VSPLTISW0:
7083     for (unsigned i = 0; i != 16; ++i)
7084       ShufIdxs[i] = (i&3)+0;
7085     break;
7086   case OP_VSPLTISW1:
7087     for (unsigned i = 0; i != 16; ++i)
7088       ShufIdxs[i] = (i&3)+4;
7089     break;
7090   case OP_VSPLTISW2:
7091     for (unsigned i = 0; i != 16; ++i)
7092       ShufIdxs[i] = (i&3)+8;
7093     break;
7094   case OP_VSPLTISW3:
7095     for (unsigned i = 0; i != 16; ++i)
7096       ShufIdxs[i] = (i&3)+12;
7097     break;
7098   case OP_VSLDOI4:
7099     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
7100   case OP_VSLDOI8:
7101     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
7102   case OP_VSLDOI12:
7103     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
7104   }
7105   EVT VT = OpLHS.getValueType();
7106   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
7107   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
7108   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
7109   return DAG.getNode(ISD::BITCAST, dl, VT, T);
7110 }
7111
7112 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
7113 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
7114 /// return the code it can be lowered into.  Worst case, it can always be
7115 /// lowered into a vperm.
7116 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
7117                                                SelectionDAG &DAG) const {
7118   SDLoc dl(Op);
7119   SDValue V1 = Op.getOperand(0);
7120   SDValue V2 = Op.getOperand(1);
7121   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
7122   EVT VT = Op.getValueType();
7123   bool isLittleEndian = Subtarget.isLittleEndian();
7124
7125   if (Subtarget.hasQPX()) {
7126     if (VT.getVectorNumElements() != 4)
7127       return SDValue();
7128
7129     if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
7130
7131     int AlignIdx = PPC::isQVALIGNIShuffleMask(SVOp);
7132     if (AlignIdx != -1) {
7133       return DAG.getNode(PPCISD::QVALIGNI, dl, VT, V1, V2,
7134                          DAG.getConstant(AlignIdx, dl, MVT::i32));
7135     } else if (SVOp->isSplat()) {
7136       int SplatIdx = SVOp->getSplatIndex();
7137       if (SplatIdx >= 4) {
7138         std::swap(V1, V2);
7139         SplatIdx -= 4;
7140       }
7141
7142       // FIXME: If SplatIdx == 0 and the input came from a load, then there is
7143       // nothing to do.
7144
7145       return DAG.getNode(PPCISD::QVESPLATI, dl, VT, V1,
7146                          DAG.getConstant(SplatIdx, dl, MVT::i32));
7147     }
7148
7149     // Lower this into a qvgpci/qvfperm pair.
7150
7151     // Compute the qvgpci literal
7152     unsigned idx = 0;
7153     for (unsigned i = 0; i < 4; ++i) {
7154       int m = SVOp->getMaskElt(i);
7155       unsigned mm = m >= 0 ? (unsigned) m : i;
7156       idx |= mm << (3-i)*3;
7157     }
7158
7159     SDValue V3 = DAG.getNode(PPCISD::QVGPCI, dl, MVT::v4f64,
7160                              DAG.getConstant(idx, dl, MVT::i32));
7161     return DAG.getNode(PPCISD::QVFPERM, dl, VT, V1, V2, V3);
7162   }
7163
7164   // Cases that are handled by instructions that take permute immediates
7165   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
7166   // selected by the instruction selector.
7167   if (V2.getOpcode() == ISD::UNDEF) {
7168     if (PPC::isSplatShuffleMask(SVOp, 1) ||
7169         PPC::isSplatShuffleMask(SVOp, 2) ||
7170         PPC::isSplatShuffleMask(SVOp, 4) ||
7171         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
7172         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
7173         PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
7174         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
7175         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
7176         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
7177         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
7178         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
7179         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
7180         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
7181         PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG)   ||
7182         PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)) {
7183       return Op;
7184     }
7185   }
7186
7187   // Altivec has a variety of "shuffle immediates" that take two vector inputs
7188   // and produce a fixed permutation.  If any of these match, do not lower to
7189   // VPERM.
7190   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
7191   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7192       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7193       PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
7194       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
7195       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7196       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7197       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
7198       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
7199       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
7200       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
7201       PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG)             ||
7202       PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))
7203     return Op;
7204
7205   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
7206   // perfect shuffle table to emit an optimal matching sequence.
7207   ArrayRef<int> PermMask = SVOp->getMask();
7208
7209   unsigned PFIndexes[4];
7210   bool isFourElementShuffle = true;
7211   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
7212     unsigned EltNo = 8;   // Start out undef.
7213     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
7214       if (PermMask[i*4+j] < 0)
7215         continue;   // Undef, ignore it.
7216
7217       unsigned ByteSource = PermMask[i*4+j];
7218       if ((ByteSource & 3) != j) {
7219         isFourElementShuffle = false;
7220         break;
7221       }
7222
7223       if (EltNo == 8) {
7224         EltNo = ByteSource/4;
7225       } else if (EltNo != ByteSource/4) {
7226         isFourElementShuffle = false;
7227         break;
7228       }
7229     }
7230     PFIndexes[i] = EltNo;
7231   }
7232
7233   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
7234   // perfect shuffle vector to determine if it is cost effective to do this as
7235   // discrete instructions, or whether we should use a vperm.
7236   // For now, we skip this for little endian until such time as we have a
7237   // little-endian perfect shuffle table.
7238   if (isFourElementShuffle && !isLittleEndian) {
7239     // Compute the index in the perfect shuffle table.
7240     unsigned PFTableIndex =
7241       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
7242
7243     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7244     unsigned Cost  = (PFEntry >> 30);
7245
7246     // Determining when to avoid vperm is tricky.  Many things affect the cost
7247     // of vperm, particularly how many times the perm mask needs to be computed.
7248     // For example, if the perm mask can be hoisted out of a loop or is already
7249     // used (perhaps because there are multiple permutes with the same shuffle
7250     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
7251     // the loop requires an extra register.
7252     //
7253     // As a compromise, we only emit discrete instructions if the shuffle can be
7254     // generated in 3 or fewer operations.  When we have loop information
7255     // available, if this block is within a loop, we should avoid using vperm
7256     // for 3-operation perms and use a constant pool load instead.
7257     if (Cost < 3)
7258       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
7259   }
7260
7261   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
7262   // vector that will get spilled to the constant pool.
7263   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
7264
7265   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
7266   // that it is in input element units, not in bytes.  Convert now.
7267
7268   // For little endian, the order of the input vectors is reversed, and
7269   // the permutation mask is complemented with respect to 31.  This is
7270   // necessary to produce proper semantics with the big-endian-biased vperm
7271   // instruction.
7272   EVT EltVT = V1.getValueType().getVectorElementType();
7273   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
7274
7275   SmallVector<SDValue, 16> ResultMask;
7276   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
7277     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
7278
7279     for (unsigned j = 0; j != BytesPerElement; ++j)
7280       if (isLittleEndian)
7281         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
7282                                              dl, MVT::i32));
7283       else
7284         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
7285                                              MVT::i32));
7286   }
7287
7288   SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
7289                                   ResultMask);
7290   if (isLittleEndian)
7291     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7292                        V2, V1, VPermMask);
7293   else
7294     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
7295                        V1, V2, VPermMask);
7296 }
7297
7298 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
7299 /// altivec comparison.  If it is, return true and fill in Opc/isDot with
7300 /// information about the intrinsic.
7301 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
7302                                   bool &isDot, const PPCSubtarget &Subtarget) {
7303   unsigned IntrinsicID =
7304     cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
7305   CompareOpc = -1;
7306   isDot = false;
7307   switch (IntrinsicID) {
7308   default: return false;
7309     // Comparison predicates.
7310   case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
7311   case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
7312   case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
7313   case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
7314   case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
7315   case Intrinsic::ppc_altivec_vcmpequd_p:
7316     if (Subtarget.hasP8Altivec()) {
7317       CompareOpc = 199;
7318       isDot = 1;
7319     }
7320     else
7321       return false;
7322
7323     break;
7324   case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
7325   case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
7326   case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
7327   case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
7328   case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
7329   case Intrinsic::ppc_altivec_vcmpgtsd_p:
7330     if (Subtarget.hasP8Altivec()) {
7331       CompareOpc = 967;
7332       isDot = 1;
7333     }
7334     else
7335       return false;
7336
7337     break;
7338   case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
7339   case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
7340   case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
7341   case Intrinsic::ppc_altivec_vcmpgtud_p:
7342     if (Subtarget.hasP8Altivec()) {
7343       CompareOpc = 711;
7344       isDot = 1;
7345     }
7346     else
7347       return false;
7348
7349     break;
7350
7351     // Normal Comparisons.
7352   case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
7353   case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
7354   case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
7355   case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
7356   case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
7357   case Intrinsic::ppc_altivec_vcmpequd:
7358     if (Subtarget.hasP8Altivec()) {
7359       CompareOpc = 199;
7360       isDot = 0;
7361     }
7362     else
7363       return false;
7364
7365     break;
7366   case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
7367   case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
7368   case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
7369   case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
7370   case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
7371   case Intrinsic::ppc_altivec_vcmpgtsd:
7372     if (Subtarget.hasP8Altivec()) {
7373       CompareOpc = 967;
7374       isDot = 0;
7375     }
7376     else
7377       return false;
7378
7379     break;
7380   case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
7381   case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
7382   case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
7383   case Intrinsic::ppc_altivec_vcmpgtud:
7384     if (Subtarget.hasP8Altivec()) {
7385       CompareOpc = 711;
7386       isDot = 0;
7387     }
7388     else
7389       return false;
7390
7391     break;
7392   }
7393   return true;
7394 }
7395
7396 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
7397 /// lower, do it, otherwise return null.
7398 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
7399                                                    SelectionDAG &DAG) const {
7400   // If this is a lowered altivec predicate compare, CompareOpc is set to the
7401   // opcode number of the comparison.
7402   SDLoc dl(Op);
7403   int CompareOpc;
7404   bool isDot;
7405   if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
7406     return SDValue();    // Don't custom lower most intrinsics.
7407
7408   // If this is a non-dot comparison, make the VCMP node and we are done.
7409   if (!isDot) {
7410     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
7411                               Op.getOperand(1), Op.getOperand(2),
7412                               DAG.getConstant(CompareOpc, dl, MVT::i32));
7413     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
7414   }
7415
7416   // Create the PPCISD altivec 'dot' comparison node.
7417   SDValue Ops[] = {
7418     Op.getOperand(2),  // LHS
7419     Op.getOperand(3),  // RHS
7420     DAG.getConstant(CompareOpc, dl, MVT::i32)
7421   };
7422   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
7423   SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
7424
7425   // Now that we have the comparison, emit a copy from the CR to a GPR.
7426   // This is flagged to the above dot comparison.
7427   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
7428                                 DAG.getRegister(PPC::CR6, MVT::i32),
7429                                 CompNode.getValue(1));
7430
7431   // Unpack the result based on how the target uses it.
7432   unsigned BitNo;   // Bit # of CR6.
7433   bool InvertBit;   // Invert result?
7434   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
7435   default:  // Can't happen, don't crash on invalid number though.
7436   case 0:   // Return the value of the EQ bit of CR6.
7437     BitNo = 0; InvertBit = false;
7438     break;
7439   case 1:   // Return the inverted value of the EQ bit of CR6.
7440     BitNo = 0; InvertBit = true;
7441     break;
7442   case 2:   // Return the value of the LT bit of CR6.
7443     BitNo = 2; InvertBit = false;
7444     break;
7445   case 3:   // Return the inverted value of the LT bit of CR6.
7446     BitNo = 2; InvertBit = true;
7447     break;
7448   }
7449
7450   // Shift the bit into the low position.
7451   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
7452                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
7453   // Isolate the bit.
7454   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
7455                       DAG.getConstant(1, dl, MVT::i32));
7456
7457   // If we are supposed to, toggle the bit.
7458   if (InvertBit)
7459     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
7460                         DAG.getConstant(1, dl, MVT::i32));
7461   return Flags;
7462 }
7463
7464 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
7465                                                   SelectionDAG &DAG) const {
7466   SDLoc dl(Op);
7467   // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
7468   // instructions), but for smaller types, we need to first extend up to v2i32
7469   // before doing going farther.
7470   if (Op.getValueType() == MVT::v2i64) {
7471     EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
7472     if (ExtVT != MVT::v2i32) {
7473       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
7474       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
7475                        DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
7476                                         ExtVT.getVectorElementType(), 4)));
7477       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
7478       Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
7479                        DAG.getValueType(MVT::v2i32));
7480     }
7481
7482     return Op;
7483   }
7484
7485   return SDValue();
7486 }
7487
7488 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
7489                                                    SelectionDAG &DAG) const {
7490   SDLoc dl(Op);
7491   // Create a stack slot that is 16-byte aligned.
7492   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7493   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7494   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7495   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7496
7497   // Store the input value into Value#0 of the stack slot.
7498   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
7499                                Op.getOperand(0), FIdx, MachinePointerInfo(),
7500                                false, false, 0);
7501   // Load it out.
7502   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
7503                      false, false, false, 0);
7504 }
7505
7506 SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
7507                                                    SelectionDAG &DAG) const {
7508   SDLoc dl(Op);
7509   SDNode *N = Op.getNode();
7510
7511   assert(N->getOperand(0).getValueType() == MVT::v4i1 &&
7512          "Unknown extract_vector_elt type");
7513
7514   SDValue Value = N->getOperand(0);
7515
7516   // The first part of this is like the store lowering except that we don't
7517   // need to track the chain.
7518
7519   // The values are now known to be -1 (false) or 1 (true). To convert this
7520   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7521   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7522   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7523
7524   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
7525   // understand how to form the extending load.
7526   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
7527   FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
7528                         FPHalfs, FPHalfs, FPHalfs, FPHalfs);
7529
7530   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7531
7532   // Now convert to an integer and store.
7533   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7534     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
7535     Value);
7536
7537   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7538   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7539   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
7540   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7541   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7542
7543   SDValue StoreChain = DAG.getEntryNode();
7544   SmallVector<SDValue, 2> Ops;
7545   Ops.push_back(StoreChain);
7546   Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32));
7547   Ops.push_back(Value);
7548   Ops.push_back(FIdx);
7549
7550   SmallVector<EVT, 2> ValueVTs;
7551   ValueVTs.push_back(MVT::Other); // chain
7552   SDVTList VTs = DAG.getVTList(ValueVTs);
7553
7554   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
7555     dl, VTs, Ops, MVT::v4i32, PtrInfo);
7556
7557   // Extract the value requested.
7558   unsigned Offset = 4*cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
7559   SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7560   Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7561
7562   SDValue IntVal = DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
7563                                PtrInfo.getWithOffset(Offset),
7564                                false, false, false, 0);
7565
7566   if (!Subtarget.useCRBits())
7567     return IntVal;
7568
7569   return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, IntVal);
7570 }
7571
7572 /// Lowering for QPX v4i1 loads
7573 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
7574                                            SelectionDAG &DAG) const {
7575   SDLoc dl(Op);
7576   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
7577   SDValue LoadChain = LN->getChain();
7578   SDValue BasePtr = LN->getBasePtr();
7579
7580   if (Op.getValueType() == MVT::v4f64 ||
7581       Op.getValueType() == MVT::v4f32) {
7582     EVT MemVT = LN->getMemoryVT();
7583     unsigned Alignment = LN->getAlignment();
7584
7585     // If this load is properly aligned, then it is legal.
7586     if (Alignment >= MemVT.getStoreSize())
7587       return Op;
7588
7589     EVT ScalarVT = Op.getValueType().getScalarType(),
7590         ScalarMemVT = MemVT.getScalarType();
7591     unsigned Stride = ScalarMemVT.getStoreSize();
7592
7593     SmallVector<SDValue, 8> Vals, LoadChains;
7594     for (unsigned Idx = 0; Idx < 4; ++Idx) {
7595       SDValue Load;
7596       if (ScalarVT != ScalarMemVT)
7597         Load =
7598           DAG.getExtLoad(LN->getExtensionType(), dl, ScalarVT, LoadChain,
7599                          BasePtr,
7600                          LN->getPointerInfo().getWithOffset(Idx*Stride),
7601                          ScalarMemVT, LN->isVolatile(), LN->isNonTemporal(),
7602                          LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
7603                          LN->getAAInfo());
7604       else
7605         Load =
7606           DAG.getLoad(ScalarVT, dl, LoadChain, BasePtr,
7607                        LN->getPointerInfo().getWithOffset(Idx*Stride),
7608                        LN->isVolatile(), LN->isNonTemporal(),
7609                        LN->isInvariant(), MinAlign(Alignment, Idx*Stride),
7610                        LN->getAAInfo());
7611
7612       if (Idx == 0 && LN->isIndexed()) {
7613         assert(LN->getAddressingMode() == ISD::PRE_INC &&
7614                "Unknown addressing mode on vector load");
7615         Load = DAG.getIndexedLoad(Load, dl, BasePtr, LN->getOffset(),
7616                                   LN->getAddressingMode());
7617       }
7618
7619       Vals.push_back(Load);
7620       LoadChains.push_back(Load.getValue(1));
7621
7622       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
7623                             DAG.getConstant(Stride, dl,
7624                                             BasePtr.getValueType()));
7625     }
7626
7627     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
7628     SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
7629                                 Op.getValueType(), Vals);
7630
7631     if (LN->isIndexed()) {
7632       SDValue RetOps[] = { Value, Vals[0].getValue(1), TF };
7633       return DAG.getMergeValues(RetOps, dl);
7634     }
7635
7636     SDValue RetOps[] = { Value, TF };
7637     return DAG.getMergeValues(RetOps, dl);
7638   }
7639
7640   assert(Op.getValueType() == MVT::v4i1 && "Unknown load to lower");
7641   assert(LN->isUnindexed() && "Indexed v4i1 loads are not supported");
7642
7643   // To lower v4i1 from a byte array, we load the byte elements of the
7644   // vector and then reuse the BUILD_VECTOR logic.
7645
7646   SmallVector<SDValue, 4> VectElmts, VectElmtChains;
7647   for (unsigned i = 0; i < 4; ++i) {
7648     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
7649     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
7650
7651     VectElmts.push_back(DAG.getExtLoad(ISD::EXTLOAD,
7652                         dl, MVT::i32, LoadChain, Idx,
7653                         LN->getPointerInfo().getWithOffset(i),
7654                         MVT::i8 /* memory type */,
7655                         LN->isVolatile(), LN->isNonTemporal(),
7656                         LN->isInvariant(),
7657                         1 /* alignment */, LN->getAAInfo()));
7658     VectElmtChains.push_back(VectElmts[i].getValue(1));
7659   }
7660
7661   LoadChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, VectElmtChains);
7662   SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i1, VectElmts);
7663
7664   SDValue RVals[] = { Value, LoadChain };
7665   return DAG.getMergeValues(RVals, dl);
7666 }
7667
7668 /// Lowering for QPX v4i1 stores
7669 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
7670                                             SelectionDAG &DAG) const {
7671   SDLoc dl(Op);
7672   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
7673   SDValue StoreChain = SN->getChain();
7674   SDValue BasePtr = SN->getBasePtr();
7675   SDValue Value = SN->getValue();
7676
7677   if (Value.getValueType() == MVT::v4f64 ||
7678       Value.getValueType() == MVT::v4f32) {
7679     EVT MemVT = SN->getMemoryVT();
7680     unsigned Alignment = SN->getAlignment();
7681
7682     // If this store is properly aligned, then it is legal.
7683     if (Alignment >= MemVT.getStoreSize())
7684       return Op;
7685
7686     EVT ScalarVT = Value.getValueType().getScalarType(),
7687         ScalarMemVT = MemVT.getScalarType();
7688     unsigned Stride = ScalarMemVT.getStoreSize();
7689
7690     SmallVector<SDValue, 8> Stores;
7691     for (unsigned Idx = 0; Idx < 4; ++Idx) {
7692       SDValue Ex = DAG.getNode(
7693           ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Value,
7694           DAG.getConstant(Idx, dl, getVectorIdxTy(DAG.getDataLayout())));
7695       SDValue Store;
7696       if (ScalarVT != ScalarMemVT)
7697         Store =
7698           DAG.getTruncStore(StoreChain, dl, Ex, BasePtr,
7699                             SN->getPointerInfo().getWithOffset(Idx*Stride),
7700                             ScalarMemVT, SN->isVolatile(), SN->isNonTemporal(),
7701                             MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
7702       else
7703         Store =
7704           DAG.getStore(StoreChain, dl, Ex, BasePtr,
7705                        SN->getPointerInfo().getWithOffset(Idx*Stride),
7706                        SN->isVolatile(), SN->isNonTemporal(),
7707                        MinAlign(Alignment, Idx*Stride), SN->getAAInfo());
7708
7709       if (Idx == 0 && SN->isIndexed()) {
7710         assert(SN->getAddressingMode() == ISD::PRE_INC &&
7711                "Unknown addressing mode on vector store");
7712         Store = DAG.getIndexedStore(Store, dl, BasePtr, SN->getOffset(),
7713                                     SN->getAddressingMode());
7714       }
7715
7716       BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
7717                             DAG.getConstant(Stride, dl,
7718                                             BasePtr.getValueType()));
7719       Stores.push_back(Store);
7720     }
7721
7722     SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7723
7724     if (SN->isIndexed()) {
7725       SDValue RetOps[] = { TF, Stores[0].getValue(1) };
7726       return DAG.getMergeValues(RetOps, dl);
7727     }
7728
7729     return TF;
7730   }
7731
7732   assert(SN->isUnindexed() && "Indexed v4i1 stores are not supported");
7733   assert(Value.getValueType() == MVT::v4i1 && "Unknown store to lower");
7734
7735   // The values are now known to be -1 (false) or 1 (true). To convert this
7736   // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7737   // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7738   Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
7739
7740   // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
7741   // understand how to form the extending load.
7742   SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
7743   FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
7744                         FPHalfs, FPHalfs, FPHalfs, FPHalfs);
7745
7746   Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
7747
7748   // Now convert to an integer and store.
7749   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
7750     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
7751     Value);
7752
7753   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
7754   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
7755   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
7756   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7757   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
7758
7759   SmallVector<SDValue, 2> Ops;
7760   Ops.push_back(StoreChain);
7761   Ops.push_back(DAG.getConstant(Intrinsic::ppc_qpx_qvstfiw, dl, MVT::i32));
7762   Ops.push_back(Value);
7763   Ops.push_back(FIdx);
7764
7765   SmallVector<EVT, 2> ValueVTs;
7766   ValueVTs.push_back(MVT::Other); // chain
7767   SDVTList VTs = DAG.getVTList(ValueVTs);
7768
7769   StoreChain = DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID,
7770     dl, VTs, Ops, MVT::v4i32, PtrInfo);
7771
7772   // Move data into the byte array.
7773   SmallVector<SDValue, 4> Loads, LoadChains;
7774   for (unsigned i = 0; i < 4; ++i) {
7775     unsigned Offset = 4*i;
7776     SDValue Idx = DAG.getConstant(Offset, dl, FIdx.getValueType());
7777     Idx = DAG.getNode(ISD::ADD, dl, FIdx.getValueType(), FIdx, Idx);
7778
7779     Loads.push_back(DAG.getLoad(MVT::i32, dl, StoreChain, Idx,
7780                                    PtrInfo.getWithOffset(Offset),
7781                                    false, false, false, 0));
7782     LoadChains.push_back(Loads[i].getValue(1));
7783   }
7784
7785   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
7786
7787   SmallVector<SDValue, 4> Stores;
7788   for (unsigned i = 0; i < 4; ++i) {
7789     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
7790     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
7791
7792     Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx,
7793                                        SN->getPointerInfo().getWithOffset(i),
7794                                        MVT::i8 /* memory type */,
7795                                        SN->isNonTemporal(), SN->isVolatile(),
7796                                        1 /* alignment */, SN->getAAInfo()));
7797   }
7798
7799   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
7800
7801   return StoreChain;
7802 }
7803
7804 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
7805   SDLoc dl(Op);
7806   if (Op.getValueType() == MVT::v4i32) {
7807     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7808
7809     SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
7810     SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
7811
7812     SDValue RHSSwap =   // = vrlw RHS, 16
7813       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
7814
7815     // Shrinkify inputs to v8i16.
7816     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
7817     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
7818     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
7819
7820     // Low parts multiplied together, generating 32-bit results (we ignore the
7821     // top parts).
7822     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
7823                                         LHS, RHS, DAG, dl, MVT::v4i32);
7824
7825     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
7826                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
7827     // Shift the high parts up 16 bits.
7828     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
7829                               Neg16, DAG, dl);
7830     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
7831   } else if (Op.getValueType() == MVT::v8i16) {
7832     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7833
7834     SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
7835
7836     return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
7837                             LHS, RHS, Zero, DAG, dl);
7838   } else if (Op.getValueType() == MVT::v16i8) {
7839     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7840     bool isLittleEndian = Subtarget.isLittleEndian();
7841
7842     // Multiply the even 8-bit parts, producing 16-bit sums.
7843     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
7844                                            LHS, RHS, DAG, dl, MVT::v8i16);
7845     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
7846
7847     // Multiply the odd 8-bit parts, producing 16-bit sums.
7848     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
7849                                           LHS, RHS, DAG, dl, MVT::v8i16);
7850     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
7851
7852     // Merge the results together.  Because vmuleub and vmuloub are
7853     // instructions with a big-endian bias, we must reverse the
7854     // element numbering and reverse the meaning of "odd" and "even"
7855     // when generating little endian code.
7856     int Ops[16];
7857     for (unsigned i = 0; i != 8; ++i) {
7858       if (isLittleEndian) {
7859         Ops[i*2  ] = 2*i;
7860         Ops[i*2+1] = 2*i+16;
7861       } else {
7862         Ops[i*2  ] = 2*i+1;
7863         Ops[i*2+1] = 2*i+1+16;
7864       }
7865     }
7866     if (isLittleEndian)
7867       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
7868     else
7869       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
7870   } else {
7871     llvm_unreachable("Unknown mul to lower!");
7872   }
7873 }
7874
7875 /// LowerOperation - Provide custom lowering hooks for some operations.
7876 ///
7877 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
7878   switch (Op.getOpcode()) {
7879   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
7880   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
7881   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
7882   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
7883   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
7884   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
7885   case ISD::SETCC:              return LowerSETCC(Op, DAG);
7886   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
7887   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
7888   case ISD::VASTART:
7889     return LowerVASTART(Op, DAG, Subtarget);
7890
7891   case ISD::VAARG:
7892     return LowerVAARG(Op, DAG, Subtarget);
7893
7894   case ISD::VACOPY:
7895     return LowerVACOPY(Op, DAG, Subtarget);
7896
7897   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, Subtarget);
7898   case ISD::DYNAMIC_STACKALLOC:
7899     return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
7900
7901   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
7902   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
7903
7904   case ISD::LOAD:               return LowerLOAD(Op, DAG);
7905   case ISD::STORE:              return LowerSTORE(Op, DAG);
7906   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
7907   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
7908   case ISD::FP_TO_UINT:
7909   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
7910                                                       SDLoc(Op));
7911   case ISD::UINT_TO_FP:
7912   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
7913   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
7914
7915   // Lower 64-bit shifts.
7916   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
7917   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
7918   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
7919
7920   // Vector-related lowering.
7921   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
7922   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
7923   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
7924   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
7925   case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
7926   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
7927   case ISD::MUL:                return LowerMUL(Op, DAG);
7928
7929   // For counter-based loop handling.
7930   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
7931
7932   // Frame & Return address.
7933   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
7934   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
7935   }
7936 }
7937
7938 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
7939                                            SmallVectorImpl<SDValue>&Results,
7940                                            SelectionDAG &DAG) const {
7941   SDLoc dl(N);
7942   switch (N->getOpcode()) {
7943   default:
7944     llvm_unreachable("Do not know how to custom type legalize this operation!");
7945   case ISD::READCYCLECOUNTER: {
7946     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
7947     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
7948
7949     Results.push_back(RTB);
7950     Results.push_back(RTB.getValue(1));
7951     Results.push_back(RTB.getValue(2));
7952     break;
7953   }
7954   case ISD::INTRINSIC_W_CHAIN: {
7955     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
7956         Intrinsic::ppc_is_decremented_ctr_nonzero)
7957       break;
7958
7959     assert(N->getValueType(0) == MVT::i1 &&
7960            "Unexpected result type for CTR decrement intrinsic");
7961     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
7962                                  N->getValueType(0));
7963     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
7964     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
7965                                  N->getOperand(1));
7966
7967     Results.push_back(NewInt);
7968     Results.push_back(NewInt.getValue(1));
7969     break;
7970   }
7971   case ISD::VAARG: {
7972     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
7973       return;
7974
7975     EVT VT = N->getValueType(0);
7976
7977     if (VT == MVT::i64) {
7978       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget);
7979
7980       Results.push_back(NewNode);
7981       Results.push_back(NewNode.getValue(1));
7982     }
7983     return;
7984   }
7985   case ISD::FP_ROUND_INREG: {
7986     assert(N->getValueType(0) == MVT::ppcf128);
7987     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
7988     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7989                              MVT::f64, N->getOperand(0),
7990                              DAG.getIntPtrConstant(0, dl));
7991     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
7992                              MVT::f64, N->getOperand(0),
7993                              DAG.getIntPtrConstant(1, dl));
7994
7995     // Add the two halves of the long double in round-to-zero mode.
7996     SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7997
7998     // We know the low half is about to be thrown away, so just use something
7999     // convenient.
8000     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
8001                                 FPreg, FPreg));
8002     return;
8003   }
8004   case ISD::FP_TO_SINT:
8005   case ISD::FP_TO_UINT:
8006     // LowerFP_TO_INT() can only handle f32 and f64.
8007     if (N->getOperand(0).getValueType() == MVT::ppcf128)
8008       return;
8009     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
8010     return;
8011   }
8012 }
8013
8014
8015 //===----------------------------------------------------------------------===//
8016 //  Other Lowering Code
8017 //===----------------------------------------------------------------------===//
8018
8019 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
8020   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
8021   Function *Func = Intrinsic::getDeclaration(M, Id);
8022   return Builder.CreateCall(Func, {});
8023 }
8024
8025 // The mappings for emitLeading/TrailingFence is taken from
8026 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
8027 Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
8028                                          AtomicOrdering Ord, bool IsStore,
8029                                          bool IsLoad) const {
8030   if (Ord == SequentiallyConsistent)
8031     return callIntrinsic(Builder, Intrinsic::ppc_sync);
8032   if (isAtLeastRelease(Ord))
8033     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
8034   return nullptr;
8035 }
8036
8037 Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
8038                                           AtomicOrdering Ord, bool IsStore,
8039                                           bool IsLoad) const {
8040   if (IsLoad && isAtLeastAcquire(Ord))
8041     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
8042   // FIXME: this is too conservative, a dependent branch + isync is enough.
8043   // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
8044   // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
8045   // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
8046   return nullptr;
8047 }
8048
8049 MachineBasicBlock *
8050 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
8051                                     unsigned AtomicSize,
8052                                     unsigned BinOpcode) const {
8053   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
8054   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8055
8056   auto LoadMnemonic = PPC::LDARX;
8057   auto StoreMnemonic = PPC::STDCX;
8058   switch (AtomicSize) {
8059   default:
8060     llvm_unreachable("Unexpected size of atomic entity");
8061   case 1:
8062     LoadMnemonic = PPC::LBARX;
8063     StoreMnemonic = PPC::STBCX;
8064     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
8065     break;
8066   case 2:
8067     LoadMnemonic = PPC::LHARX;
8068     StoreMnemonic = PPC::STHCX;
8069     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
8070     break;
8071   case 4:
8072     LoadMnemonic = PPC::LWARX;
8073     StoreMnemonic = PPC::STWCX;
8074     break;
8075   case 8:
8076     LoadMnemonic = PPC::LDARX;
8077     StoreMnemonic = PPC::STDCX;
8078     break;
8079   }
8080
8081   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8082   MachineFunction *F = BB->getParent();
8083   MachineFunction::iterator It = BB;
8084   ++It;
8085
8086   unsigned dest = MI->getOperand(0).getReg();
8087   unsigned ptrA = MI->getOperand(1).getReg();
8088   unsigned ptrB = MI->getOperand(2).getReg();
8089   unsigned incr = MI->getOperand(3).getReg();
8090   DebugLoc dl = MI->getDebugLoc();
8091
8092   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
8093   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8094   F->insert(It, loopMBB);
8095   F->insert(It, exitMBB);
8096   exitMBB->splice(exitMBB->begin(), BB,
8097                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
8098   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8099
8100   MachineRegisterInfo &RegInfo = F->getRegInfo();
8101   unsigned TmpReg = (!BinOpcode) ? incr :
8102     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
8103                                            : &PPC::GPRCRegClass);
8104
8105   //  thisMBB:
8106   //   ...
8107   //   fallthrough --> loopMBB
8108   BB->addSuccessor(loopMBB);
8109
8110   //  loopMBB:
8111   //   l[wd]arx dest, ptr
8112   //   add r0, dest, incr
8113   //   st[wd]cx. r0, ptr
8114   //   bne- loopMBB
8115   //   fallthrough --> exitMBB
8116   BB = loopMBB;
8117   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
8118     .addReg(ptrA).addReg(ptrB);
8119   if (BinOpcode)
8120     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
8121   BuildMI(BB, dl, TII->get(StoreMnemonic))
8122     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
8123   BuildMI(BB, dl, TII->get(PPC::BCC))
8124     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
8125   BB->addSuccessor(loopMBB);
8126   BB->addSuccessor(exitMBB);
8127
8128   //  exitMBB:
8129   //   ...
8130   BB = exitMBB;
8131   return BB;
8132 }
8133
8134 MachineBasicBlock *
8135 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
8136                                             MachineBasicBlock *BB,
8137                                             bool is8bit,    // operation
8138                                             unsigned BinOpcode) const {
8139   // If we support part-word atomic mnemonics, just use them
8140   if (Subtarget.hasPartwordAtomics())
8141     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode);
8142
8143   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
8144   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8145   // In 64 bit mode we have to use 64 bits for addresses, even though the
8146   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
8147   // registers without caring whether they're 32 or 64, but here we're
8148   // doing actual arithmetic on the addresses.
8149   bool is64bit = Subtarget.isPPC64();
8150   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
8151
8152   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8153   MachineFunction *F = BB->getParent();
8154   MachineFunction::iterator It = BB;
8155   ++It;
8156
8157   unsigned dest = MI->getOperand(0).getReg();
8158   unsigned ptrA = MI->getOperand(1).getReg();
8159   unsigned ptrB = MI->getOperand(2).getReg();
8160   unsigned incr = MI->getOperand(3).getReg();
8161   DebugLoc dl = MI->getDebugLoc();
8162
8163   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
8164   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8165   F->insert(It, loopMBB);
8166   F->insert(It, exitMBB);
8167   exitMBB->splice(exitMBB->begin(), BB,
8168                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
8169   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8170
8171   MachineRegisterInfo &RegInfo = F->getRegInfo();
8172   const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
8173                                           : &PPC::GPRCRegClass;
8174   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
8175   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
8176   unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
8177   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
8178   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
8179   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
8180   unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
8181   unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
8182   unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
8183   unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
8184   unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
8185   unsigned Ptr1Reg;
8186   unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
8187
8188   //  thisMBB:
8189   //   ...
8190   //   fallthrough --> loopMBB
8191   BB->addSuccessor(loopMBB);
8192
8193   // The 4-byte load must be aligned, while a char or short may be
8194   // anywhere in the word.  Hence all this nasty bookkeeping code.
8195   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
8196   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
8197   //   xori shift, shift1, 24 [16]
8198   //   rlwinm ptr, ptr1, 0, 0, 29
8199   //   slw incr2, incr, shift
8200   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
8201   //   slw mask, mask2, shift
8202   //  loopMBB:
8203   //   lwarx tmpDest, ptr
8204   //   add tmp, tmpDest, incr2
8205   //   andc tmp2, tmpDest, mask
8206   //   and tmp3, tmp, mask
8207   //   or tmp4, tmp3, tmp2
8208   //   stwcx. tmp4, ptr
8209   //   bne- loopMBB
8210   //   fallthrough --> exitMBB
8211   //   srw dest, tmpDest, shift
8212   if (ptrA != ZeroReg) {
8213     Ptr1Reg = RegInfo.createVirtualRegister(RC);
8214     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
8215       .addReg(ptrA).addReg(ptrB);
8216   } else {
8217     Ptr1Reg = ptrB;
8218   }
8219   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
8220       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
8221   BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
8222       .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
8223   if (is64bit)
8224     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
8225       .addReg(Ptr1Reg).addImm(0).addImm(61);
8226   else
8227     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
8228       .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
8229   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
8230       .addReg(incr).addReg(ShiftReg);
8231   if (is8bit)
8232     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
8233   else {
8234     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
8235     BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
8236   }
8237   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
8238       .addReg(Mask2Reg).addReg(ShiftReg);
8239
8240   BB = loopMBB;
8241   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
8242     .addReg(ZeroReg).addReg(PtrReg);
8243   if (BinOpcode)
8244     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
8245       .addReg(Incr2Reg).addReg(TmpDestReg);
8246   BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
8247     .addReg(TmpDestReg).addReg(MaskReg);
8248   BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
8249     .addReg(TmpReg).addReg(MaskReg);
8250   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
8251     .addReg(Tmp3Reg).addReg(Tmp2Reg);
8252   BuildMI(BB, dl, TII->get(PPC::STWCX))
8253     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
8254   BuildMI(BB, dl, TII->get(PPC::BCC))
8255     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
8256   BB->addSuccessor(loopMBB);
8257   BB->addSuccessor(exitMBB);
8258
8259   //  exitMBB:
8260   //   ...
8261   BB = exitMBB;
8262   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
8263     .addReg(ShiftReg);
8264   return BB;
8265 }
8266
8267 llvm::MachineBasicBlock*
8268 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
8269                                     MachineBasicBlock *MBB) const {
8270   DebugLoc DL = MI->getDebugLoc();
8271   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8272
8273   MachineFunction *MF = MBB->getParent();
8274   MachineRegisterInfo &MRI = MF->getRegInfo();
8275
8276   const BasicBlock *BB = MBB->getBasicBlock();
8277   MachineFunction::iterator I = MBB;
8278   ++I;
8279
8280   // Memory Reference
8281   MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
8282   MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
8283
8284   unsigned DstReg = MI->getOperand(0).getReg();
8285   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
8286   assert(RC->hasType(MVT::i32) && "Invalid destination!");
8287   unsigned mainDstReg = MRI.createVirtualRegister(RC);
8288   unsigned restoreDstReg = MRI.createVirtualRegister(RC);
8289
8290   MVT PVT = getPointerTy(MF->getDataLayout());
8291   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8292          "Invalid Pointer Size!");
8293   // For v = setjmp(buf), we generate
8294   //
8295   // thisMBB:
8296   //  SjLjSetup mainMBB
8297   //  bl mainMBB
8298   //  v_restore = 1
8299   //  b sinkMBB
8300   //
8301   // mainMBB:
8302   //  buf[LabelOffset] = LR
8303   //  v_main = 0
8304   //
8305   // sinkMBB:
8306   //  v = phi(main, restore)
8307   //
8308
8309   MachineBasicBlock *thisMBB = MBB;
8310   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
8311   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
8312   MF->insert(I, mainMBB);
8313   MF->insert(I, sinkMBB);
8314
8315   MachineInstrBuilder MIB;
8316
8317   // Transfer the remainder of BB and its successor edges to sinkMBB.
8318   sinkMBB->splice(sinkMBB->begin(), MBB,
8319                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
8320   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
8321
8322   // Note that the structure of the jmp_buf used here is not compatible
8323   // with that used by libc, and is not designed to be. Specifically, it
8324   // stores only those 'reserved' registers that LLVM does not otherwise
8325   // understand how to spill. Also, by convention, by the time this
8326   // intrinsic is called, Clang has already stored the frame address in the
8327   // first slot of the buffer and stack address in the third. Following the
8328   // X86 target code, we'll store the jump address in the second slot. We also
8329   // need to save the TOC pointer (R2) to handle jumps between shared
8330   // libraries, and that will be stored in the fourth slot. The thread
8331   // identifier (R13) is not affected.
8332
8333   // thisMBB:
8334   const int64_t LabelOffset = 1 * PVT.getStoreSize();
8335   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
8336   const int64_t BPOffset    = 4 * PVT.getStoreSize();
8337
8338   // Prepare IP either in reg.
8339   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
8340   unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
8341   unsigned BufReg = MI->getOperand(1).getReg();
8342
8343   if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
8344     setUsesTOCBasePtr(*MBB->getParent());
8345     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
8346             .addReg(PPC::X2)
8347             .addImm(TOCOffset)
8348             .addReg(BufReg);
8349     MIB.setMemRefs(MMOBegin, MMOEnd);
8350   }
8351
8352   // Naked functions never have a base pointer, and so we use r1. For all
8353   // other functions, this decision must be delayed until during PEI.
8354   unsigned BaseReg;
8355   if (MF->getFunction()->hasFnAttribute(Attribute::Naked))
8356     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
8357   else
8358     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
8359
8360   MIB = BuildMI(*thisMBB, MI, DL,
8361                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
8362             .addReg(BaseReg)
8363             .addImm(BPOffset)
8364             .addReg(BufReg);
8365   MIB.setMemRefs(MMOBegin, MMOEnd);
8366
8367   // Setup
8368   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
8369   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
8370   MIB.addRegMask(TRI->getNoPreservedMask());
8371
8372   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
8373
8374   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
8375           .addMBB(mainMBB);
8376   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
8377
8378   thisMBB->addSuccessor(mainMBB, /* weight */ 0);
8379   thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
8380
8381   // mainMBB:
8382   //  mainDstReg = 0
8383   MIB =
8384       BuildMI(mainMBB, DL,
8385               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
8386
8387   // Store IP
8388   if (Subtarget.isPPC64()) {
8389     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
8390             .addReg(LabelReg)
8391             .addImm(LabelOffset)
8392             .addReg(BufReg);
8393   } else {
8394     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
8395             .addReg(LabelReg)
8396             .addImm(LabelOffset)
8397             .addReg(BufReg);
8398   }
8399
8400   MIB.setMemRefs(MMOBegin, MMOEnd);
8401
8402   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
8403   mainMBB->addSuccessor(sinkMBB);
8404
8405   // sinkMBB:
8406   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
8407           TII->get(PPC::PHI), DstReg)
8408     .addReg(mainDstReg).addMBB(mainMBB)
8409     .addReg(restoreDstReg).addMBB(thisMBB);
8410
8411   MI->eraseFromParent();
8412   return sinkMBB;
8413 }
8414
8415 MachineBasicBlock *
8416 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
8417                                      MachineBasicBlock *MBB) const {
8418   DebugLoc DL = MI->getDebugLoc();
8419   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8420
8421   MachineFunction *MF = MBB->getParent();
8422   MachineRegisterInfo &MRI = MF->getRegInfo();
8423
8424   // Memory Reference
8425   MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
8426   MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
8427
8428   MVT PVT = getPointerTy(MF->getDataLayout());
8429   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
8430          "Invalid Pointer Size!");
8431
8432   const TargetRegisterClass *RC =
8433     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
8434   unsigned Tmp = MRI.createVirtualRegister(RC);
8435   // Since FP is only updated here but NOT referenced, it's treated as GPR.
8436   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
8437   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
8438   unsigned BP =
8439       (PVT == MVT::i64)
8440           ? PPC::X30
8441           : (Subtarget.isSVR4ABI() &&
8442                      MF->getTarget().getRelocationModel() == Reloc::PIC_
8443                  ? PPC::R29
8444                  : PPC::R30);
8445
8446   MachineInstrBuilder MIB;
8447
8448   const int64_t LabelOffset = 1 * PVT.getStoreSize();
8449   const int64_t SPOffset    = 2 * PVT.getStoreSize();
8450   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
8451   const int64_t BPOffset    = 4 * PVT.getStoreSize();
8452
8453   unsigned BufReg = MI->getOperand(0).getReg();
8454
8455   // Reload FP (the jumped-to function may not have had a
8456   // frame pointer, and if so, then its r31 will be restored
8457   // as necessary).
8458   if (PVT == MVT::i64) {
8459     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
8460             .addImm(0)
8461             .addReg(BufReg);
8462   } else {
8463     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
8464             .addImm(0)
8465             .addReg(BufReg);
8466   }
8467   MIB.setMemRefs(MMOBegin, MMOEnd);
8468
8469   // Reload IP
8470   if (PVT == MVT::i64) {
8471     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
8472             .addImm(LabelOffset)
8473             .addReg(BufReg);
8474   } else {
8475     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
8476             .addImm(LabelOffset)
8477             .addReg(BufReg);
8478   }
8479   MIB.setMemRefs(MMOBegin, MMOEnd);
8480
8481   // Reload SP
8482   if (PVT == MVT::i64) {
8483     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
8484             .addImm(SPOffset)
8485             .addReg(BufReg);
8486   } else {
8487     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
8488             .addImm(SPOffset)
8489             .addReg(BufReg);
8490   }
8491   MIB.setMemRefs(MMOBegin, MMOEnd);
8492
8493   // Reload BP
8494   if (PVT == MVT::i64) {
8495     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
8496             .addImm(BPOffset)
8497             .addReg(BufReg);
8498   } else {
8499     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
8500             .addImm(BPOffset)
8501             .addReg(BufReg);
8502   }
8503   MIB.setMemRefs(MMOBegin, MMOEnd);
8504
8505   // Reload TOC
8506   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
8507     setUsesTOCBasePtr(*MBB->getParent());
8508     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
8509             .addImm(TOCOffset)
8510             .addReg(BufReg);
8511
8512     MIB.setMemRefs(MMOBegin, MMOEnd);
8513   }
8514
8515   // Jump
8516   BuildMI(*MBB, MI, DL,
8517           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
8518   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
8519
8520   MI->eraseFromParent();
8521   return MBB;
8522 }
8523
8524 MachineBasicBlock *
8525 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
8526                                                MachineBasicBlock *BB) const {
8527   if (MI->getOpcode() == TargetOpcode::STACKMAP ||
8528       MI->getOpcode() == TargetOpcode::PATCHPOINT) {
8529     if (Subtarget.isPPC64() && Subtarget.isSVR4ABI() &&
8530         MI->getOpcode() == TargetOpcode::PATCHPOINT) {
8531       // Call lowering should have added an r2 operand to indicate a dependence
8532       // on the TOC base pointer value. It can't however, because there is no
8533       // way to mark the dependence as implicit there, and so the stackmap code
8534       // will confuse it with a regular operand. Instead, add the dependence
8535       // here.
8536       setUsesTOCBasePtr(*BB->getParent());
8537       MI->addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
8538     }
8539
8540     return emitPatchPoint(MI, BB);
8541   }
8542
8543   if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
8544       MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
8545     return emitEHSjLjSetJmp(MI, BB);
8546   } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
8547              MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
8548     return emitEHSjLjLongJmp(MI, BB);
8549   }
8550
8551   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
8552
8553   // To "insert" these instructions we actually have to insert their
8554   // control-flow patterns.
8555   const BasicBlock *LLVM_BB = BB->getBasicBlock();
8556   MachineFunction::iterator It = BB;
8557   ++It;
8558
8559   MachineFunction *F = BB->getParent();
8560
8561   if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
8562                               MI->getOpcode() == PPC::SELECT_CC_I8 ||
8563                               MI->getOpcode() == PPC::SELECT_I4 ||
8564                               MI->getOpcode() == PPC::SELECT_I8)) {
8565     SmallVector<MachineOperand, 2> Cond;
8566     if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
8567         MI->getOpcode() == PPC::SELECT_CC_I8)
8568       Cond.push_back(MI->getOperand(4));
8569     else
8570       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
8571     Cond.push_back(MI->getOperand(1));
8572
8573     DebugLoc dl = MI->getDebugLoc();
8574     TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
8575                       Cond, MI->getOperand(2).getReg(),
8576                       MI->getOperand(3).getReg());
8577   } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
8578              MI->getOpcode() == PPC::SELECT_CC_I8 ||
8579              MI->getOpcode() == PPC::SELECT_CC_F4 ||
8580              MI->getOpcode() == PPC::SELECT_CC_F8 ||
8581              MI->getOpcode() == PPC::SELECT_CC_QFRC ||
8582              MI->getOpcode() == PPC::SELECT_CC_QSRC ||
8583              MI->getOpcode() == PPC::SELECT_CC_QBRC ||
8584              MI->getOpcode() == PPC::SELECT_CC_VRRC ||
8585              MI->getOpcode() == PPC::SELECT_CC_VSFRC ||
8586              MI->getOpcode() == PPC::SELECT_CC_VSSRC ||
8587              MI->getOpcode() == PPC::SELECT_CC_VSRC ||
8588              MI->getOpcode() == PPC::SELECT_I4 ||
8589              MI->getOpcode() == PPC::SELECT_I8 ||
8590              MI->getOpcode() == PPC::SELECT_F4 ||
8591              MI->getOpcode() == PPC::SELECT_F8 ||
8592              MI->getOpcode() == PPC::SELECT_QFRC ||
8593              MI->getOpcode() == PPC::SELECT_QSRC ||
8594              MI->getOpcode() == PPC::SELECT_QBRC ||
8595              MI->getOpcode() == PPC::SELECT_VRRC ||
8596              MI->getOpcode() == PPC::SELECT_VSFRC ||
8597              MI->getOpcode() == PPC::SELECT_VSSRC ||
8598              MI->getOpcode() == PPC::SELECT_VSRC) {
8599     // The incoming instruction knows the destination vreg to set, the
8600     // condition code register to branch on, the true/false values to
8601     // select between, and a branch opcode to use.
8602
8603     //  thisMBB:
8604     //  ...
8605     //   TrueVal = ...
8606     //   cmpTY ccX, r1, r2
8607     //   bCC copy1MBB
8608     //   fallthrough --> copy0MBB
8609     MachineBasicBlock *thisMBB = BB;
8610     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
8611     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
8612     DebugLoc dl = MI->getDebugLoc();
8613     F->insert(It, copy0MBB);
8614     F->insert(It, sinkMBB);
8615
8616     // Transfer the remainder of BB and its successor edges to sinkMBB.
8617     sinkMBB->splice(sinkMBB->begin(), BB,
8618                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
8619     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
8620
8621     // Next, add the true and fallthrough blocks as its successors.
8622     BB->addSuccessor(copy0MBB);
8623     BB->addSuccessor(sinkMBB);
8624
8625     if (MI->getOpcode() == PPC::SELECT_I4 ||
8626         MI->getOpcode() == PPC::SELECT_I8 ||
8627         MI->getOpcode() == PPC::SELECT_F4 ||
8628         MI->getOpcode() == PPC::SELECT_F8 ||
8629         MI->getOpcode() == PPC::SELECT_QFRC ||
8630         MI->getOpcode() == PPC::SELECT_QSRC ||
8631         MI->getOpcode() == PPC::SELECT_QBRC ||
8632         MI->getOpcode() == PPC::SELECT_VRRC ||
8633         MI->getOpcode() == PPC::SELECT_VSFRC ||
8634         MI->getOpcode() == PPC::SELECT_VSSRC ||
8635         MI->getOpcode() == PPC::SELECT_VSRC) {
8636       BuildMI(BB, dl, TII->get(PPC::BC))
8637         .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
8638     } else {
8639       unsigned SelectPred = MI->getOperand(4).getImm();
8640       BuildMI(BB, dl, TII->get(PPC::BCC))
8641         .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
8642     }
8643
8644     //  copy0MBB:
8645     //   %FalseValue = ...
8646     //   # fallthrough to sinkMBB
8647     BB = copy0MBB;
8648
8649     // Update machine-CFG edges
8650     BB->addSuccessor(sinkMBB);
8651
8652     //  sinkMBB:
8653     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
8654     //  ...
8655     BB = sinkMBB;
8656     BuildMI(*BB, BB->begin(), dl,
8657             TII->get(PPC::PHI), MI->getOperand(0).getReg())
8658       .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
8659       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
8660   } else if (MI->getOpcode() == PPC::ReadTB) {
8661     // To read the 64-bit time-base register on a 32-bit target, we read the
8662     // two halves. Should the counter have wrapped while it was being read, we
8663     // need to try again.
8664     // ...
8665     // readLoop:
8666     // mfspr Rx,TBU # load from TBU
8667     // mfspr Ry,TB  # load from TB
8668     // mfspr Rz,TBU # load from TBU
8669     // cmpw crX,Rx,Rz # check if ‘old’=’new’
8670     // bne readLoop   # branch if they're not equal
8671     // ...
8672
8673     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
8674     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
8675     DebugLoc dl = MI->getDebugLoc();
8676     F->insert(It, readMBB);
8677     F->insert(It, sinkMBB);
8678
8679     // Transfer the remainder of BB and its successor edges to sinkMBB.
8680     sinkMBB->splice(sinkMBB->begin(), BB,
8681                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
8682     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
8683
8684     BB->addSuccessor(readMBB);
8685     BB = readMBB;
8686
8687     MachineRegisterInfo &RegInfo = F->getRegInfo();
8688     unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
8689     unsigned LoReg = MI->getOperand(0).getReg();
8690     unsigned HiReg = MI->getOperand(1).getReg();
8691
8692     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
8693     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
8694     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
8695
8696     unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
8697
8698     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
8699       .addReg(HiReg).addReg(ReadAgainReg);
8700     BuildMI(BB, dl, TII->get(PPC::BCC))
8701       .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB);
8702
8703     BB->addSuccessor(readMBB);
8704     BB->addSuccessor(sinkMBB);
8705   }
8706   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
8707     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
8708   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
8709     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
8710   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
8711     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
8712   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
8713     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
8714
8715   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
8716     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
8717   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
8718     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
8719   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
8720     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
8721   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
8722     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
8723
8724   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
8725     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
8726   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
8727     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
8728   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
8729     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
8730   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
8731     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
8732
8733   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
8734     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
8735   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
8736     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
8737   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
8738     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
8739   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
8740     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
8741
8742   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
8743     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
8744   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
8745     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
8746   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
8747     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
8748   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
8749     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
8750
8751   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
8752     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
8753   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
8754     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
8755   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
8756     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
8757   else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
8758     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
8759
8760   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
8761     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
8762   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
8763     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
8764   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
8765     BB = EmitAtomicBinary(MI, BB, 4, 0);
8766   else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
8767     BB = EmitAtomicBinary(MI, BB, 8, 0);
8768
8769   else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
8770            MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
8771            (Subtarget.hasPartwordAtomics() &&
8772             MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
8773            (Subtarget.hasPartwordAtomics() &&
8774             MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
8775     bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
8776
8777     auto LoadMnemonic = PPC::LDARX;
8778     auto StoreMnemonic = PPC::STDCX;
8779     switch(MI->getOpcode()) {
8780     default:
8781       llvm_unreachable("Compare and swap of unknown size");
8782     case PPC::ATOMIC_CMP_SWAP_I8:
8783       LoadMnemonic = PPC::LBARX;
8784       StoreMnemonic = PPC::STBCX;
8785       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
8786       break;
8787     case PPC::ATOMIC_CMP_SWAP_I16:
8788       LoadMnemonic = PPC::LHARX;
8789       StoreMnemonic = PPC::STHCX;
8790       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
8791       break;
8792     case PPC::ATOMIC_CMP_SWAP_I32:
8793       LoadMnemonic = PPC::LWARX;
8794       StoreMnemonic = PPC::STWCX;
8795       break;
8796     case PPC::ATOMIC_CMP_SWAP_I64:
8797       LoadMnemonic = PPC::LDARX;
8798       StoreMnemonic = PPC::STDCX;
8799       break;
8800     }
8801     unsigned dest   = MI->getOperand(0).getReg();
8802     unsigned ptrA   = MI->getOperand(1).getReg();
8803     unsigned ptrB   = MI->getOperand(2).getReg();
8804     unsigned oldval = MI->getOperand(3).getReg();
8805     unsigned newval = MI->getOperand(4).getReg();
8806     DebugLoc dl     = MI->getDebugLoc();
8807
8808     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
8809     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
8810     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
8811     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8812     F->insert(It, loop1MBB);
8813     F->insert(It, loop2MBB);
8814     F->insert(It, midMBB);
8815     F->insert(It, exitMBB);
8816     exitMBB->splice(exitMBB->begin(), BB,
8817                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
8818     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8819
8820     //  thisMBB:
8821     //   ...
8822     //   fallthrough --> loopMBB
8823     BB->addSuccessor(loop1MBB);
8824
8825     // loop1MBB:
8826     //   l[bhwd]arx dest, ptr
8827     //   cmp[wd] dest, oldval
8828     //   bne- midMBB
8829     // loop2MBB:
8830     //   st[bhwd]cx. newval, ptr
8831     //   bne- loopMBB
8832     //   b exitBB
8833     // midMBB:
8834     //   st[bhwd]cx. dest, ptr
8835     // exitBB:
8836     BB = loop1MBB;
8837     BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
8838       .addReg(ptrA).addReg(ptrB);
8839     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
8840       .addReg(oldval).addReg(dest);
8841     BuildMI(BB, dl, TII->get(PPC::BCC))
8842       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
8843     BB->addSuccessor(loop2MBB);
8844     BB->addSuccessor(midMBB);
8845
8846     BB = loop2MBB;
8847     BuildMI(BB, dl, TII->get(StoreMnemonic))
8848       .addReg(newval).addReg(ptrA).addReg(ptrB);
8849     BuildMI(BB, dl, TII->get(PPC::BCC))
8850       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
8851     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
8852     BB->addSuccessor(loop1MBB);
8853     BB->addSuccessor(exitMBB);
8854
8855     BB = midMBB;
8856     BuildMI(BB, dl, TII->get(StoreMnemonic))
8857       .addReg(dest).addReg(ptrA).addReg(ptrB);
8858     BB->addSuccessor(exitMBB);
8859
8860     //  exitMBB:
8861     //   ...
8862     BB = exitMBB;
8863   } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
8864              MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
8865     // We must use 64-bit registers for addresses when targeting 64-bit,
8866     // since we're actually doing arithmetic on them.  Other registers
8867     // can be 32-bit.
8868     bool is64bit = Subtarget.isPPC64();
8869     bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
8870
8871     unsigned dest   = MI->getOperand(0).getReg();
8872     unsigned ptrA   = MI->getOperand(1).getReg();
8873     unsigned ptrB   = MI->getOperand(2).getReg();
8874     unsigned oldval = MI->getOperand(3).getReg();
8875     unsigned newval = MI->getOperand(4).getReg();
8876     DebugLoc dl     = MI->getDebugLoc();
8877
8878     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
8879     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
8880     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
8881     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
8882     F->insert(It, loop1MBB);
8883     F->insert(It, loop2MBB);
8884     F->insert(It, midMBB);
8885     F->insert(It, exitMBB);
8886     exitMBB->splice(exitMBB->begin(), BB,
8887                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
8888     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
8889
8890     MachineRegisterInfo &RegInfo = F->getRegInfo();
8891     const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass
8892                                             : &PPC::GPRCRegClass;
8893     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
8894     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
8895     unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
8896     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
8897     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
8898     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
8899     unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
8900     unsigned MaskReg = RegInfo.createVirtualRegister(RC);
8901     unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
8902     unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
8903     unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
8904     unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
8905     unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
8906     unsigned Ptr1Reg;
8907     unsigned TmpReg = RegInfo.createVirtualRegister(RC);
8908     unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
8909     //  thisMBB:
8910     //   ...
8911     //   fallthrough --> loopMBB
8912     BB->addSuccessor(loop1MBB);
8913
8914     // The 4-byte load must be aligned, while a char or short may be
8915     // anywhere in the word.  Hence all this nasty bookkeeping code.
8916     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
8917     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
8918     //   xori shift, shift1, 24 [16]
8919     //   rlwinm ptr, ptr1, 0, 0, 29
8920     //   slw newval2, newval, shift
8921     //   slw oldval2, oldval,shift
8922     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
8923     //   slw mask, mask2, shift
8924     //   and newval3, newval2, mask
8925     //   and oldval3, oldval2, mask
8926     // loop1MBB:
8927     //   lwarx tmpDest, ptr
8928     //   and tmp, tmpDest, mask
8929     //   cmpw tmp, oldval3
8930     //   bne- midMBB
8931     // loop2MBB:
8932     //   andc tmp2, tmpDest, mask
8933     //   or tmp4, tmp2, newval3
8934     //   stwcx. tmp4, ptr
8935     //   bne- loop1MBB
8936     //   b exitBB
8937     // midMBB:
8938     //   stwcx. tmpDest, ptr
8939     // exitBB:
8940     //   srw dest, tmpDest, shift
8941     if (ptrA != ZeroReg) {
8942       Ptr1Reg = RegInfo.createVirtualRegister(RC);
8943       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
8944         .addReg(ptrA).addReg(ptrB);
8945     } else {
8946       Ptr1Reg = ptrB;
8947     }
8948     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
8949         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
8950     BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
8951         .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
8952     if (is64bit)
8953       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
8954         .addReg(Ptr1Reg).addImm(0).addImm(61);
8955     else
8956       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
8957         .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
8958     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
8959         .addReg(newval).addReg(ShiftReg);
8960     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
8961         .addReg(oldval).addReg(ShiftReg);
8962     if (is8bit)
8963       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
8964     else {
8965       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
8966       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
8967         .addReg(Mask3Reg).addImm(65535);
8968     }
8969     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
8970         .addReg(Mask2Reg).addReg(ShiftReg);
8971     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
8972         .addReg(NewVal2Reg).addReg(MaskReg);
8973     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
8974         .addReg(OldVal2Reg).addReg(MaskReg);
8975
8976     BB = loop1MBB;
8977     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
8978         .addReg(ZeroReg).addReg(PtrReg);
8979     BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
8980         .addReg(TmpDestReg).addReg(MaskReg);
8981     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
8982         .addReg(TmpReg).addReg(OldVal3Reg);
8983     BuildMI(BB, dl, TII->get(PPC::BCC))
8984         .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
8985     BB->addSuccessor(loop2MBB);
8986     BB->addSuccessor(midMBB);
8987
8988     BB = loop2MBB;
8989     BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
8990         .addReg(TmpDestReg).addReg(MaskReg);
8991     BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
8992         .addReg(Tmp2Reg).addReg(NewVal3Reg);
8993     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
8994         .addReg(ZeroReg).addReg(PtrReg);
8995     BuildMI(BB, dl, TII->get(PPC::BCC))
8996       .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
8997     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
8998     BB->addSuccessor(loop1MBB);
8999     BB->addSuccessor(exitMBB);
9000
9001     BB = midMBB;
9002     BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
9003       .addReg(ZeroReg).addReg(PtrReg);
9004     BB->addSuccessor(exitMBB);
9005
9006     //  exitMBB:
9007     //   ...
9008     BB = exitMBB;
9009     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
9010       .addReg(ShiftReg);
9011   } else if (MI->getOpcode() == PPC::FADDrtz) {
9012     // This pseudo performs an FADD with rounding mode temporarily forced
9013     // to round-to-zero.  We emit this via custom inserter since the FPSCR
9014     // is not modeled at the SelectionDAG level.
9015     unsigned Dest = MI->getOperand(0).getReg();
9016     unsigned Src1 = MI->getOperand(1).getReg();
9017     unsigned Src2 = MI->getOperand(2).getReg();
9018     DebugLoc dl   = MI->getDebugLoc();
9019
9020     MachineRegisterInfo &RegInfo = F->getRegInfo();
9021     unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
9022
9023     // Save FPSCR value.
9024     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
9025
9026     // Set rounding mode to round-to-zero.
9027     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
9028     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
9029
9030     // Perform addition.
9031     BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
9032
9033     // Restore FPSCR value.
9034     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
9035   } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
9036              MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
9037              MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
9038              MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {
9039     unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
9040                        MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?
9041                       PPC::ANDIo8 : PPC::ANDIo;
9042     bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
9043                  MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);
9044
9045     MachineRegisterInfo &RegInfo = F->getRegInfo();
9046     unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
9047                                                   &PPC::GPRCRegClass :
9048                                                   &PPC::G8RCRegClass);
9049
9050     DebugLoc dl   = MI->getDebugLoc();
9051     BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
9052       .addReg(MI->getOperand(1).getReg()).addImm(1);
9053     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
9054             MI->getOperand(0).getReg())
9055       .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
9056   } else if (MI->getOpcode() == PPC::TCHECK_RET) {
9057     DebugLoc Dl = MI->getDebugLoc();
9058     MachineRegisterInfo &RegInfo = F->getRegInfo();
9059     unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
9060     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
9061     return BB;
9062   } else {
9063     llvm_unreachable("Unexpected instr type to insert");
9064   }
9065
9066   MI->eraseFromParent();   // The pseudo instruction is gone now.
9067   return BB;
9068 }
9069
9070 //===----------------------------------------------------------------------===//
9071 // Target Optimization Hooks
9072 //===----------------------------------------------------------------------===//
9073
9074 static std::string getRecipOp(const char *Base, EVT VT) {
9075   std::string RecipOp(Base);
9076   if (VT.getScalarType() == MVT::f64)
9077     RecipOp += "d";
9078   else
9079     RecipOp += "f";
9080
9081   if (VT.isVector())
9082     RecipOp = "vec-" + RecipOp;
9083
9084   return RecipOp;
9085 }
9086
9087 SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
9088                                             DAGCombinerInfo &DCI,
9089                                             unsigned &RefinementSteps,
9090                                             bool &UseOneConstNR) const {
9091   EVT VT = Operand.getValueType();
9092   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
9093       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
9094       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
9095       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
9096       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
9097       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
9098     TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
9099     std::string RecipOp = getRecipOp("sqrt", VT);
9100     if (!Recips.isEnabled(RecipOp))
9101       return SDValue();
9102
9103     RefinementSteps = Recips.getRefinementSteps(RecipOp);
9104     UseOneConstNR = true;
9105     return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
9106   }
9107   return SDValue();
9108 }
9109
9110 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
9111                                             DAGCombinerInfo &DCI,
9112                                             unsigned &RefinementSteps) const {
9113   EVT VT = Operand.getValueType();
9114   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
9115       (VT == MVT::f64 && Subtarget.hasFRE()) ||
9116       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
9117       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
9118       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
9119       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
9120     TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
9121     std::string RecipOp = getRecipOp("div", VT);
9122     if (!Recips.isEnabled(RecipOp))
9123       return SDValue();
9124
9125     RefinementSteps = Recips.getRefinementSteps(RecipOp);
9126     return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
9127   }
9128   return SDValue();
9129 }
9130
9131 bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
9132   // Note: This functionality is used only when unsafe-fp-math is enabled, and
9133   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
9134   // enabled for division), this functionality is redundant with the default
9135   // combiner logic (once the division -> reciprocal/multiply transformation
9136   // has taken place). As a result, this matters more for older cores than for
9137   // newer ones.
9138
9139   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9140   // reciprocal if there are two or more FDIVs (for embedded cores with only
9141   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
9142   switch (Subtarget.getDarwinDirective()) {
9143   default:
9144     return NumUsers > 2;
9145   case PPC::DIR_440:
9146   case PPC::DIR_A2:
9147   case PPC::DIR_E500mc:
9148   case PPC::DIR_E5500:
9149     return NumUsers > 1;
9150   }
9151 }
9152
9153 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
9154                             unsigned Bytes, int Dist,
9155                             SelectionDAG &DAG) {
9156   if (VT.getSizeInBits() / 8 != Bytes)
9157     return false;
9158
9159   SDValue BaseLoc = Base->getBasePtr();
9160   if (Loc.getOpcode() == ISD::FrameIndex) {
9161     if (BaseLoc.getOpcode() != ISD::FrameIndex)
9162       return false;
9163     const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
9164     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
9165     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
9166     int FS  = MFI->getObjectSize(FI);
9167     int BFS = MFI->getObjectSize(BFI);
9168     if (FS != BFS || FS != (int)Bytes) return false;
9169     return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
9170   }
9171
9172   // Handle X+C
9173   if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
9174       cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
9175     return true;
9176
9177   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9178   const GlobalValue *GV1 = nullptr;
9179   const GlobalValue *GV2 = nullptr;
9180   int64_t Offset1 = 0;
9181   int64_t Offset2 = 0;
9182   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
9183   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
9184   if (isGA1 && isGA2 && GV1 == GV2)
9185     return Offset1 == (Offset2 + Dist*Bytes);
9186   return false;
9187 }
9188
9189 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
9190 // not enforce equality of the chain operands.
9191 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
9192                             unsigned Bytes, int Dist,
9193                             SelectionDAG &DAG) {
9194   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
9195     EVT VT = LS->getMemoryVT();
9196     SDValue Loc = LS->getBasePtr();
9197     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
9198   }
9199
9200   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
9201     EVT VT;
9202     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9203     default: return false;
9204     case Intrinsic::ppc_qpx_qvlfd:
9205     case Intrinsic::ppc_qpx_qvlfda:
9206       VT = MVT::v4f64;
9207       break;
9208     case Intrinsic::ppc_qpx_qvlfs:
9209     case Intrinsic::ppc_qpx_qvlfsa:
9210       VT = MVT::v4f32;
9211       break;
9212     case Intrinsic::ppc_qpx_qvlfcd:
9213     case Intrinsic::ppc_qpx_qvlfcda:
9214       VT = MVT::v2f64;
9215       break;
9216     case Intrinsic::ppc_qpx_qvlfcs:
9217     case Intrinsic::ppc_qpx_qvlfcsa:
9218       VT = MVT::v2f32;
9219       break;
9220     case Intrinsic::ppc_qpx_qvlfiwa:
9221     case Intrinsic::ppc_qpx_qvlfiwz:
9222     case Intrinsic::ppc_altivec_lvx:
9223     case Intrinsic::ppc_altivec_lvxl:
9224     case Intrinsic::ppc_vsx_lxvw4x:
9225       VT = MVT::v4i32;
9226       break;
9227     case Intrinsic::ppc_vsx_lxvd2x:
9228       VT = MVT::v2f64;
9229       break;
9230     case Intrinsic::ppc_altivec_lvebx:
9231       VT = MVT::i8;
9232       break;
9233     case Intrinsic::ppc_altivec_lvehx:
9234       VT = MVT::i16;
9235       break;
9236     case Intrinsic::ppc_altivec_lvewx:
9237       VT = MVT::i32;
9238       break;
9239     }
9240
9241     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
9242   }
9243
9244   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
9245     EVT VT;
9246     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
9247     default: return false;
9248     case Intrinsic::ppc_qpx_qvstfd:
9249     case Intrinsic::ppc_qpx_qvstfda:
9250       VT = MVT::v4f64;
9251       break;
9252     case Intrinsic::ppc_qpx_qvstfs:
9253     case Intrinsic::ppc_qpx_qvstfsa:
9254       VT = MVT::v4f32;
9255       break;
9256     case Intrinsic::ppc_qpx_qvstfcd:
9257     case Intrinsic::ppc_qpx_qvstfcda:
9258       VT = MVT::v2f64;
9259       break;
9260     case Intrinsic::ppc_qpx_qvstfcs:
9261     case Intrinsic::ppc_qpx_qvstfcsa:
9262       VT = MVT::v2f32;
9263       break;
9264     case Intrinsic::ppc_qpx_qvstfiw:
9265     case Intrinsic::ppc_qpx_qvstfiwa:
9266     case Intrinsic::ppc_altivec_stvx:
9267     case Intrinsic::ppc_altivec_stvxl:
9268     case Intrinsic::ppc_vsx_stxvw4x:
9269       VT = MVT::v4i32;
9270       break;
9271     case Intrinsic::ppc_vsx_stxvd2x:
9272       VT = MVT::v2f64;
9273       break;
9274     case Intrinsic::ppc_altivec_stvebx:
9275       VT = MVT::i8;
9276       break;
9277     case Intrinsic::ppc_altivec_stvehx:
9278       VT = MVT::i16;
9279       break;
9280     case Intrinsic::ppc_altivec_stvewx:
9281       VT = MVT::i32;
9282       break;
9283     }
9284
9285     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
9286   }
9287
9288   return false;
9289 }
9290
9291 // Return true is there is a nearyby consecutive load to the one provided
9292 // (regardless of alignment). We search up and down the chain, looking though
9293 // token factors and other loads (but nothing else). As a result, a true result
9294 // indicates that it is safe to create a new consecutive load adjacent to the
9295 // load provided.
9296 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
9297   SDValue Chain = LD->getChain();
9298   EVT VT = LD->getMemoryVT();
9299
9300   SmallSet<SDNode *, 16> LoadRoots;
9301   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
9302   SmallSet<SDNode *, 16> Visited;
9303
9304   // First, search up the chain, branching to follow all token-factor operands.
9305   // If we find a consecutive load, then we're done, otherwise, record all
9306   // nodes just above the top-level loads and token factors.
9307   while (!Queue.empty()) {
9308     SDNode *ChainNext = Queue.pop_back_val();
9309     if (!Visited.insert(ChainNext).second)
9310       continue;
9311
9312     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
9313       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
9314         return true;
9315
9316       if (!Visited.count(ChainLD->getChain().getNode()))
9317         Queue.push_back(ChainLD->getChain().getNode());
9318     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
9319       for (const SDUse &O : ChainNext->ops())
9320         if (!Visited.count(O.getNode()))
9321           Queue.push_back(O.getNode());
9322     } else
9323       LoadRoots.insert(ChainNext);
9324   }
9325
9326   // Second, search down the chain, starting from the top-level nodes recorded
9327   // in the first phase. These top-level nodes are the nodes just above all
9328   // loads and token factors. Starting with their uses, recursively look though
9329   // all loads (just the chain uses) and token factors to find a consecutive
9330   // load.
9331   Visited.clear();
9332   Queue.clear();
9333
9334   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
9335        IE = LoadRoots.end(); I != IE; ++I) {
9336     Queue.push_back(*I);
9337
9338     while (!Queue.empty()) {
9339       SDNode *LoadRoot = Queue.pop_back_val();
9340       if (!Visited.insert(LoadRoot).second)
9341         continue;
9342
9343       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
9344         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
9345           return true;
9346
9347       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
9348            UE = LoadRoot->use_end(); UI != UE; ++UI)
9349         if (((isa<MemSDNode>(*UI) &&
9350             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
9351             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
9352           Queue.push_back(*UI);
9353     }
9354   }
9355
9356   return false;
9357 }
9358
9359 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
9360                                                   DAGCombinerInfo &DCI) const {
9361   SelectionDAG &DAG = DCI.DAG;
9362   SDLoc dl(N);
9363
9364   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
9365   // If we're tracking CR bits, we need to be careful that we don't have:
9366   //   trunc(binary-ops(zext(x), zext(y)))
9367   // or
9368   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
9369   // such that we're unnecessarily moving things into GPRs when it would be
9370   // better to keep them in CR bits.
9371
9372   // Note that trunc here can be an actual i1 trunc, or can be the effective
9373   // truncation that comes from a setcc or select_cc.
9374   if (N->getOpcode() == ISD::TRUNCATE &&
9375       N->getValueType(0) != MVT::i1)
9376     return SDValue();
9377
9378   if (N->getOperand(0).getValueType() != MVT::i32 &&
9379       N->getOperand(0).getValueType() != MVT::i64)
9380     return SDValue();
9381
9382   if (N->getOpcode() == ISD::SETCC ||
9383       N->getOpcode() == ISD::SELECT_CC) {
9384     // If we're looking at a comparison, then we need to make sure that the
9385     // high bits (all except for the first) don't matter the result.
9386     ISD::CondCode CC =
9387       cast<CondCodeSDNode>(N->getOperand(
9388         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
9389     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
9390
9391     if (ISD::isSignedIntSetCC(CC)) {
9392       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
9393           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
9394         return SDValue();
9395     } else if (ISD::isUnsignedIntSetCC(CC)) {
9396       if (!DAG.MaskedValueIsZero(N->getOperand(0),
9397                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
9398           !DAG.MaskedValueIsZero(N->getOperand(1),
9399                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
9400         return SDValue();
9401     } else {
9402       // This is neither a signed nor an unsigned comparison, just make sure
9403       // that the high bits are equal.
9404       APInt Op1Zero, Op1One;
9405       APInt Op2Zero, Op2One;
9406       DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
9407       DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
9408
9409       // We don't really care about what is known about the first bit (if
9410       // anything), so clear it in all masks prior to comparing them.
9411       Op1Zero.clearBit(0); Op1One.clearBit(0);
9412       Op2Zero.clearBit(0); Op2One.clearBit(0);
9413
9414       if (Op1Zero != Op2Zero || Op1One != Op2One)
9415         return SDValue();
9416     }
9417   }
9418
9419   // We now know that the higher-order bits are irrelevant, we just need to
9420   // make sure that all of the intermediate operations are bit operations, and
9421   // all inputs are extensions.
9422   if (N->getOperand(0).getOpcode() != ISD::AND &&
9423       N->getOperand(0).getOpcode() != ISD::OR  &&
9424       N->getOperand(0).getOpcode() != ISD::XOR &&
9425       N->getOperand(0).getOpcode() != ISD::SELECT &&
9426       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
9427       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
9428       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
9429       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
9430       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
9431     return SDValue();
9432
9433   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
9434       N->getOperand(1).getOpcode() != ISD::AND &&
9435       N->getOperand(1).getOpcode() != ISD::OR  &&
9436       N->getOperand(1).getOpcode() != ISD::XOR &&
9437       N->getOperand(1).getOpcode() != ISD::SELECT &&
9438       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
9439       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
9440       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
9441       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
9442       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
9443     return SDValue();
9444
9445   SmallVector<SDValue, 4> Inputs;
9446   SmallVector<SDValue, 8> BinOps, PromOps;
9447   SmallPtrSet<SDNode *, 16> Visited;
9448
9449   for (unsigned i = 0; i < 2; ++i) {
9450     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9451           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9452           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
9453           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
9454         isa<ConstantSDNode>(N->getOperand(i)))
9455       Inputs.push_back(N->getOperand(i));
9456     else
9457       BinOps.push_back(N->getOperand(i));
9458
9459     if (N->getOpcode() == ISD::TRUNCATE)
9460       break;
9461   }
9462
9463   // Visit all inputs, collect all binary operations (and, or, xor and
9464   // select) that are all fed by extensions.
9465   while (!BinOps.empty()) {
9466     SDValue BinOp = BinOps.back();
9467     BinOps.pop_back();
9468
9469     if (!Visited.insert(BinOp.getNode()).second)
9470       continue;
9471
9472     PromOps.push_back(BinOp);
9473
9474     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
9475       // The condition of the select is not promoted.
9476       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
9477         continue;
9478       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
9479         continue;
9480
9481       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9482             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9483             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
9484            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
9485           isa<ConstantSDNode>(BinOp.getOperand(i))) {
9486         Inputs.push_back(BinOp.getOperand(i));
9487       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
9488                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
9489                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
9490                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
9491                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
9492                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
9493                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
9494                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
9495                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
9496         BinOps.push_back(BinOp.getOperand(i));
9497       } else {
9498         // We have an input that is not an extension or another binary
9499         // operation; we'll abort this transformation.
9500         return SDValue();
9501       }
9502     }
9503   }
9504
9505   // Make sure that this is a self-contained cluster of operations (which
9506   // is not quite the same thing as saying that everything has only one
9507   // use).
9508   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9509     if (isa<ConstantSDNode>(Inputs[i]))
9510       continue;
9511
9512     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
9513                               UE = Inputs[i].getNode()->use_end();
9514          UI != UE; ++UI) {
9515       SDNode *User = *UI;
9516       if (User != N && !Visited.count(User))
9517         return SDValue();
9518
9519       // Make sure that we're not going to promote the non-output-value
9520       // operand(s) or SELECT or SELECT_CC.
9521       // FIXME: Although we could sometimes handle this, and it does occur in
9522       // practice that one of the condition inputs to the select is also one of
9523       // the outputs, we currently can't deal with this.
9524       if (User->getOpcode() == ISD::SELECT) {
9525         if (User->getOperand(0) == Inputs[i])
9526           return SDValue();
9527       } else if (User->getOpcode() == ISD::SELECT_CC) {
9528         if (User->getOperand(0) == Inputs[i] ||
9529             User->getOperand(1) == Inputs[i])
9530           return SDValue();
9531       }
9532     }
9533   }
9534
9535   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
9536     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
9537                               UE = PromOps[i].getNode()->use_end();
9538          UI != UE; ++UI) {
9539       SDNode *User = *UI;
9540       if (User != N && !Visited.count(User))
9541         return SDValue();
9542
9543       // Make sure that we're not going to promote the non-output-value
9544       // operand(s) or SELECT or SELECT_CC.
9545       // FIXME: Although we could sometimes handle this, and it does occur in
9546       // practice that one of the condition inputs to the select is also one of
9547       // the outputs, we currently can't deal with this.
9548       if (User->getOpcode() == ISD::SELECT) {
9549         if (User->getOperand(0) == PromOps[i])
9550           return SDValue();
9551       } else if (User->getOpcode() == ISD::SELECT_CC) {
9552         if (User->getOperand(0) == PromOps[i] ||
9553             User->getOperand(1) == PromOps[i])
9554           return SDValue();
9555       }
9556     }
9557   }
9558
9559   // Replace all inputs with the extension operand.
9560   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9561     // Constants may have users outside the cluster of to-be-promoted nodes,
9562     // and so we need to replace those as we do the promotions.
9563     if (isa<ConstantSDNode>(Inputs[i]))
9564       continue;
9565     else
9566       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
9567   }
9568
9569   // Replace all operations (these are all the same, but have a different
9570   // (i1) return type). DAG.getNode will validate that the types of
9571   // a binary operator match, so go through the list in reverse so that
9572   // we've likely promoted both operands first. Any intermediate truncations or
9573   // extensions disappear.
9574   while (!PromOps.empty()) {
9575     SDValue PromOp = PromOps.back();
9576     PromOps.pop_back();
9577
9578     if (PromOp.getOpcode() == ISD::TRUNCATE ||
9579         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
9580         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
9581         PromOp.getOpcode() == ISD::ANY_EXTEND) {
9582       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
9583           PromOp.getOperand(0).getValueType() != MVT::i1) {
9584         // The operand is not yet ready (see comment below).
9585         PromOps.insert(PromOps.begin(), PromOp);
9586         continue;
9587       }
9588
9589       SDValue RepValue = PromOp.getOperand(0);
9590       if (isa<ConstantSDNode>(RepValue))
9591         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
9592
9593       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
9594       continue;
9595     }
9596
9597     unsigned C;
9598     switch (PromOp.getOpcode()) {
9599     default:             C = 0; break;
9600     case ISD::SELECT:    C = 1; break;
9601     case ISD::SELECT_CC: C = 2; break;
9602     }
9603
9604     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
9605          PromOp.getOperand(C).getValueType() != MVT::i1) ||
9606         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
9607          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
9608       // The to-be-promoted operands of this node have not yet been
9609       // promoted (this should be rare because we're going through the
9610       // list backward, but if one of the operands has several users in
9611       // this cluster of to-be-promoted nodes, it is possible).
9612       PromOps.insert(PromOps.begin(), PromOp);
9613       continue;
9614     }
9615
9616     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
9617                                 PromOp.getNode()->op_end());
9618
9619     // If there are any constant inputs, make sure they're replaced now.
9620     for (unsigned i = 0; i < 2; ++i)
9621       if (isa<ConstantSDNode>(Ops[C+i]))
9622         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
9623
9624     DAG.ReplaceAllUsesOfValueWith(PromOp,
9625       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
9626   }
9627
9628   // Now we're left with the initial truncation itself.
9629   if (N->getOpcode() == ISD::TRUNCATE)
9630     return N->getOperand(0);
9631
9632   // Otherwise, this is a comparison. The operands to be compared have just
9633   // changed type (to i1), but everything else is the same.
9634   return SDValue(N, 0);
9635 }
9636
9637 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
9638                                                   DAGCombinerInfo &DCI) const {
9639   SelectionDAG &DAG = DCI.DAG;
9640   SDLoc dl(N);
9641
9642   // If we're tracking CR bits, we need to be careful that we don't have:
9643   //   zext(binary-ops(trunc(x), trunc(y)))
9644   // or
9645   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
9646   // such that we're unnecessarily moving things into CR bits that can more
9647   // efficiently stay in GPRs. Note that if we're not certain that the high
9648   // bits are set as required by the final extension, we still may need to do
9649   // some masking to get the proper behavior.
9650
9651   // This same functionality is important on PPC64 when dealing with
9652   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
9653   // the return values of functions. Because it is so similar, it is handled
9654   // here as well.
9655
9656   if (N->getValueType(0) != MVT::i32 &&
9657       N->getValueType(0) != MVT::i64)
9658     return SDValue();
9659
9660   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
9661         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
9662     return SDValue();
9663
9664   if (N->getOperand(0).getOpcode() != ISD::AND &&
9665       N->getOperand(0).getOpcode() != ISD::OR  &&
9666       N->getOperand(0).getOpcode() != ISD::XOR &&
9667       N->getOperand(0).getOpcode() != ISD::SELECT &&
9668       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
9669     return SDValue();
9670
9671   SmallVector<SDValue, 4> Inputs;
9672   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
9673   SmallPtrSet<SDNode *, 16> Visited;
9674
9675   // Visit all inputs, collect all binary operations (and, or, xor and
9676   // select) that are all fed by truncations.
9677   while (!BinOps.empty()) {
9678     SDValue BinOp = BinOps.back();
9679     BinOps.pop_back();
9680
9681     if (!Visited.insert(BinOp.getNode()).second)
9682       continue;
9683
9684     PromOps.push_back(BinOp);
9685
9686     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
9687       // The condition of the select is not promoted.
9688       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
9689         continue;
9690       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
9691         continue;
9692
9693       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
9694           isa<ConstantSDNode>(BinOp.getOperand(i))) {
9695         Inputs.push_back(BinOp.getOperand(i));
9696       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
9697                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
9698                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
9699                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
9700                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
9701         BinOps.push_back(BinOp.getOperand(i));
9702       } else {
9703         // We have an input that is not a truncation or another binary
9704         // operation; we'll abort this transformation.
9705         return SDValue();
9706       }
9707     }
9708   }
9709
9710   // The operands of a select that must be truncated when the select is
9711   // promoted because the operand is actually part of the to-be-promoted set.
9712   DenseMap<SDNode *, EVT> SelectTruncOp[2];
9713
9714   // Make sure that this is a self-contained cluster of operations (which
9715   // is not quite the same thing as saying that everything has only one
9716   // use).
9717   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9718     if (isa<ConstantSDNode>(Inputs[i]))
9719       continue;
9720
9721     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
9722                               UE = Inputs[i].getNode()->use_end();
9723          UI != UE; ++UI) {
9724       SDNode *User = *UI;
9725       if (User != N && !Visited.count(User))
9726         return SDValue();
9727
9728       // If we're going to promote the non-output-value operand(s) or SELECT or
9729       // SELECT_CC, record them for truncation.
9730       if (User->getOpcode() == ISD::SELECT) {
9731         if (User->getOperand(0) == Inputs[i])
9732           SelectTruncOp[0].insert(std::make_pair(User,
9733                                     User->getOperand(0).getValueType()));
9734       } else if (User->getOpcode() == ISD::SELECT_CC) {
9735         if (User->getOperand(0) == Inputs[i])
9736           SelectTruncOp[0].insert(std::make_pair(User,
9737                                     User->getOperand(0).getValueType()));
9738         if (User->getOperand(1) == Inputs[i])
9739           SelectTruncOp[1].insert(std::make_pair(User,
9740                                     User->getOperand(1).getValueType()));
9741       }
9742     }
9743   }
9744
9745   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
9746     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
9747                               UE = PromOps[i].getNode()->use_end();
9748          UI != UE; ++UI) {
9749       SDNode *User = *UI;
9750       if (User != N && !Visited.count(User))
9751         return SDValue();
9752
9753       // If we're going to promote the non-output-value operand(s) or SELECT or
9754       // SELECT_CC, record them for truncation.
9755       if (User->getOpcode() == ISD::SELECT) {
9756         if (User->getOperand(0) == PromOps[i])
9757           SelectTruncOp[0].insert(std::make_pair(User,
9758                                     User->getOperand(0).getValueType()));
9759       } else if (User->getOpcode() == ISD::SELECT_CC) {
9760         if (User->getOperand(0) == PromOps[i])
9761           SelectTruncOp[0].insert(std::make_pair(User,
9762                                     User->getOperand(0).getValueType()));
9763         if (User->getOperand(1) == PromOps[i])
9764           SelectTruncOp[1].insert(std::make_pair(User,
9765                                     User->getOperand(1).getValueType()));
9766       }
9767     }
9768   }
9769
9770   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
9771   bool ReallyNeedsExt = false;
9772   if (N->getOpcode() != ISD::ANY_EXTEND) {
9773     // If all of the inputs are not already sign/zero extended, then
9774     // we'll still need to do that at the end.
9775     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9776       if (isa<ConstantSDNode>(Inputs[i]))
9777         continue;
9778
9779       unsigned OpBits =
9780         Inputs[i].getOperand(0).getValueSizeInBits();
9781       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
9782
9783       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
9784            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
9785                                   APInt::getHighBitsSet(OpBits,
9786                                                         OpBits-PromBits))) ||
9787           (N->getOpcode() == ISD::SIGN_EXTEND &&
9788            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
9789              (OpBits-(PromBits-1)))) {
9790         ReallyNeedsExt = true;
9791         break;
9792       }
9793     }
9794   }
9795
9796   // Replace all inputs, either with the truncation operand, or a
9797   // truncation or extension to the final output type.
9798   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
9799     // Constant inputs need to be replaced with the to-be-promoted nodes that
9800     // use them because they might have users outside of the cluster of
9801     // promoted nodes.
9802     if (isa<ConstantSDNode>(Inputs[i]))
9803       continue;
9804
9805     SDValue InSrc = Inputs[i].getOperand(0);
9806     if (Inputs[i].getValueType() == N->getValueType(0))
9807       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
9808     else if (N->getOpcode() == ISD::SIGN_EXTEND)
9809       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
9810         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
9811     else if (N->getOpcode() == ISD::ZERO_EXTEND)
9812       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
9813         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
9814     else
9815       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
9816         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
9817   }
9818
9819   // Replace all operations (these are all the same, but have a different
9820   // (promoted) return type). DAG.getNode will validate that the types of
9821   // a binary operator match, so go through the list in reverse so that
9822   // we've likely promoted both operands first.
9823   while (!PromOps.empty()) {
9824     SDValue PromOp = PromOps.back();
9825     PromOps.pop_back();
9826
9827     unsigned C;
9828     switch (PromOp.getOpcode()) {
9829     default:             C = 0; break;
9830     case ISD::SELECT:    C = 1; break;
9831     case ISD::SELECT_CC: C = 2; break;
9832     }
9833
9834     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
9835          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
9836         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
9837          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
9838       // The to-be-promoted operands of this node have not yet been
9839       // promoted (this should be rare because we're going through the
9840       // list backward, but if one of the operands has several users in
9841       // this cluster of to-be-promoted nodes, it is possible).
9842       PromOps.insert(PromOps.begin(), PromOp);
9843       continue;
9844     }
9845
9846     // For SELECT and SELECT_CC nodes, we do a similar check for any
9847     // to-be-promoted comparison inputs.
9848     if (PromOp.getOpcode() == ISD::SELECT ||
9849         PromOp.getOpcode() == ISD::SELECT_CC) {
9850       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
9851            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
9852           (SelectTruncOp[1].count(PromOp.getNode()) &&
9853            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
9854         PromOps.insert(PromOps.begin(), PromOp);
9855         continue;
9856       }
9857     }
9858
9859     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
9860                                 PromOp.getNode()->op_end());
9861
9862     // If this node has constant inputs, then they'll need to be promoted here.
9863     for (unsigned i = 0; i < 2; ++i) {
9864       if (!isa<ConstantSDNode>(Ops[C+i]))
9865         continue;
9866       if (Ops[C+i].getValueType() == N->getValueType(0))
9867         continue;
9868
9869       if (N->getOpcode() == ISD::SIGN_EXTEND)
9870         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
9871       else if (N->getOpcode() == ISD::ZERO_EXTEND)
9872         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
9873       else
9874         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
9875     }
9876
9877     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
9878     // truncate them again to the original value type.
9879     if (PromOp.getOpcode() == ISD::SELECT ||
9880         PromOp.getOpcode() == ISD::SELECT_CC) {
9881       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
9882       if (SI0 != SelectTruncOp[0].end())
9883         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
9884       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
9885       if (SI1 != SelectTruncOp[1].end())
9886         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
9887     }
9888
9889     DAG.ReplaceAllUsesOfValueWith(PromOp,
9890       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
9891   }
9892
9893   // Now we're left with the initial extension itself.
9894   if (!ReallyNeedsExt)
9895     return N->getOperand(0);
9896
9897   // To zero extend, just mask off everything except for the first bit (in the
9898   // i1 case).
9899   if (N->getOpcode() == ISD::ZERO_EXTEND)
9900     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
9901                        DAG.getConstant(APInt::getLowBitsSet(
9902                                          N->getValueSizeInBits(0), PromBits),
9903                                        dl, N->getValueType(0)));
9904
9905   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
9906          "Invalid extension type");
9907   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
9908   SDValue ShiftCst =
9909     DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
9910   return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
9911                      DAG.getNode(ISD::SHL, dl, N->getValueType(0),
9912                                  N->getOperand(0), ShiftCst), ShiftCst);
9913 }
9914
9915 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
9916                                               DAGCombinerInfo &DCI) const {
9917   assert((N->getOpcode() == ISD::SINT_TO_FP ||
9918           N->getOpcode() == ISD::UINT_TO_FP) &&
9919          "Need an int -> FP conversion node here");
9920
9921   if (!Subtarget.has64BitSupport())
9922     return SDValue();
9923
9924   SelectionDAG &DAG = DCI.DAG;
9925   SDLoc dl(N);
9926   SDValue Op(N, 0);
9927
9928   // Don't handle ppc_fp128 here or i1 conversions.
9929   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
9930     return SDValue();
9931   if (Op.getOperand(0).getValueType() == MVT::i1)
9932     return SDValue();
9933
9934   // For i32 intermediate values, unfortunately, the conversion functions
9935   // leave the upper 32 bits of the value are undefined. Within the set of
9936   // scalar instructions, we have no method for zero- or sign-extending the
9937   // value. Thus, we cannot handle i32 intermediate values here.
9938   if (Op.getOperand(0).getValueType() == MVT::i32)
9939     return SDValue();
9940
9941   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
9942          "UINT_TO_FP is supported only with FPCVT");
9943
9944   // If we have FCFIDS, then use it when converting to single-precision.
9945   // Otherwise, convert to double-precision and then round.
9946   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
9947                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
9948                                                             : PPCISD::FCFIDS)
9949                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
9950                                                             : PPCISD::FCFID);
9951   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
9952                   ? MVT::f32
9953                   : MVT::f64;
9954
9955   // If we're converting from a float, to an int, and back to a float again,
9956   // then we don't need the store/load pair at all.
9957   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
9958        Subtarget.hasFPCVT()) ||
9959       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
9960     SDValue Src = Op.getOperand(0).getOperand(0);
9961     if (Src.getValueType() == MVT::f32) {
9962       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
9963       DCI.AddToWorklist(Src.getNode());
9964     }
9965
9966     unsigned FCTOp =
9967       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
9968                                                         PPCISD::FCTIDUZ;
9969
9970     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
9971     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
9972
9973     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9974       FP = DAG.getNode(ISD::FP_ROUND, dl,
9975                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
9976       DCI.AddToWorklist(FP.getNode());
9977     }
9978
9979     return FP;
9980   }
9981
9982   return SDValue();
9983 }
9984
9985 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
9986 // builtins) into loads with swaps.
9987 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
9988                                               DAGCombinerInfo &DCI) const {
9989   SelectionDAG &DAG = DCI.DAG;
9990   SDLoc dl(N);
9991   SDValue Chain;
9992   SDValue Base;
9993   MachineMemOperand *MMO;
9994
9995   switch (N->getOpcode()) {
9996   default:
9997     llvm_unreachable("Unexpected opcode for little endian VSX load");
9998   case ISD::LOAD: {
9999     LoadSDNode *LD = cast<LoadSDNode>(N);
10000     Chain = LD->getChain();
10001     Base = LD->getBasePtr();
10002     MMO = LD->getMemOperand();
10003     // If the MMO suggests this isn't a load of a full vector, leave
10004     // things alone.  For a built-in, we have to make the change for
10005     // correctness, so if there is a size problem that will be a bug.
10006     if (MMO->getSize() < 16)
10007       return SDValue();
10008     break;
10009   }
10010   case ISD::INTRINSIC_W_CHAIN: {
10011     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
10012     Chain = Intrin->getChain();
10013     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
10014     // us what we want. Get operand 2 instead.
10015     Base = Intrin->getOperand(2);
10016     MMO = Intrin->getMemOperand();
10017     break;
10018   }
10019   }
10020
10021   MVT VecTy = N->getValueType(0).getSimpleVT();
10022   SDValue LoadOps[] = { Chain, Base };
10023   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
10024                                          DAG.getVTList(VecTy, MVT::Other),
10025                                          LoadOps, VecTy, MMO);
10026   DCI.AddToWorklist(Load.getNode());
10027   Chain = Load.getValue(1);
10028   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
10029                              DAG.getVTList(VecTy, MVT::Other), Chain, Load);
10030   DCI.AddToWorklist(Swap.getNode());
10031   return Swap;
10032 }
10033
10034 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
10035 // builtins) into stores with swaps.
10036 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
10037                                                DAGCombinerInfo &DCI) const {
10038   SelectionDAG &DAG = DCI.DAG;
10039   SDLoc dl(N);
10040   SDValue Chain;
10041   SDValue Base;
10042   unsigned SrcOpnd;
10043   MachineMemOperand *MMO;
10044
10045   switch (N->getOpcode()) {
10046   default:
10047     llvm_unreachable("Unexpected opcode for little endian VSX store");
10048   case ISD::STORE: {
10049     StoreSDNode *ST = cast<StoreSDNode>(N);
10050     Chain = ST->getChain();
10051     Base = ST->getBasePtr();
10052     MMO = ST->getMemOperand();
10053     SrcOpnd = 1;
10054     // If the MMO suggests this isn't a store of a full vector, leave
10055     // things alone.  For a built-in, we have to make the change for
10056     // correctness, so if there is a size problem that will be a bug.
10057     if (MMO->getSize() < 16)
10058       return SDValue();
10059     break;
10060   }
10061   case ISD::INTRINSIC_VOID: {
10062     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
10063     Chain = Intrin->getChain();
10064     // Intrin->getBasePtr() oddly does not get what we want.
10065     Base = Intrin->getOperand(3);
10066     MMO = Intrin->getMemOperand();
10067     SrcOpnd = 2;
10068     break;
10069   }
10070   }
10071
10072   SDValue Src = N->getOperand(SrcOpnd);
10073   MVT VecTy = Src.getValueType().getSimpleVT();
10074   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
10075                              DAG.getVTList(VecTy, MVT::Other), Chain, Src);
10076   DCI.AddToWorklist(Swap.getNode());
10077   Chain = Swap.getValue(1);
10078   SDValue StoreOps[] = { Chain, Swap, Base };
10079   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
10080                                           DAG.getVTList(MVT::Other),
10081                                           StoreOps, VecTy, MMO);
10082   DCI.AddToWorklist(Store.getNode());
10083   return Store;
10084 }
10085
10086 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
10087                                              DAGCombinerInfo &DCI) const {
10088   SelectionDAG &DAG = DCI.DAG;
10089   SDLoc dl(N);
10090   switch (N->getOpcode()) {
10091   default: break;
10092   case PPCISD::SHL:
10093     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
10094       if (C->isNullValue())   // 0 << V -> 0.
10095         return N->getOperand(0);
10096     }
10097     break;
10098   case PPCISD::SRL:
10099     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
10100       if (C->isNullValue())   // 0 >>u V -> 0.
10101         return N->getOperand(0);
10102     }
10103     break;
10104   case PPCISD::SRA:
10105     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
10106       if (C->isNullValue() ||   //  0 >>s V -> 0.
10107           C->isAllOnesValue())    // -1 >>s V -> -1.
10108         return N->getOperand(0);
10109     }
10110     break;
10111   case ISD::SIGN_EXTEND:
10112   case ISD::ZERO_EXTEND:
10113   case ISD::ANY_EXTEND:
10114     return DAGCombineExtBoolTrunc(N, DCI);
10115   case ISD::TRUNCATE:
10116   case ISD::SETCC:
10117   case ISD::SELECT_CC:
10118     return DAGCombineTruncBoolExt(N, DCI);
10119   case ISD::SINT_TO_FP:
10120   case ISD::UINT_TO_FP:
10121     return combineFPToIntToFP(N, DCI);
10122   case ISD::STORE: {
10123     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
10124     if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
10125         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
10126         N->getOperand(1).getValueType() == MVT::i32 &&
10127         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
10128       SDValue Val = N->getOperand(1).getOperand(0);
10129       if (Val.getValueType() == MVT::f32) {
10130         Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
10131         DCI.AddToWorklist(Val.getNode());
10132       }
10133       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
10134       DCI.AddToWorklist(Val.getNode());
10135
10136       SDValue Ops[] = {
10137         N->getOperand(0), Val, N->getOperand(2),
10138         DAG.getValueType(N->getOperand(1).getValueType())
10139       };
10140
10141       Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
10142               DAG.getVTList(MVT::Other), Ops,
10143               cast<StoreSDNode>(N)->getMemoryVT(),
10144               cast<StoreSDNode>(N)->getMemOperand());
10145       DCI.AddToWorklist(Val.getNode());
10146       return Val;
10147     }
10148
10149     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
10150     if (cast<StoreSDNode>(N)->isUnindexed() &&
10151         N->getOperand(1).getOpcode() == ISD::BSWAP &&
10152         N->getOperand(1).getNode()->hasOneUse() &&
10153         (N->getOperand(1).getValueType() == MVT::i32 ||
10154          N->getOperand(1).getValueType() == MVT::i16 ||
10155          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
10156           N->getOperand(1).getValueType() == MVT::i64))) {
10157       SDValue BSwapOp = N->getOperand(1).getOperand(0);
10158       // Do an any-extend to 32-bits if this is a half-word input.
10159       if (BSwapOp.getValueType() == MVT::i16)
10160         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
10161
10162       SDValue Ops[] = {
10163         N->getOperand(0), BSwapOp, N->getOperand(2),
10164         DAG.getValueType(N->getOperand(1).getValueType())
10165       };
10166       return
10167         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
10168                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
10169                                 cast<StoreSDNode>(N)->getMemOperand());
10170     }
10171
10172     // For little endian, VSX stores require generating xxswapd/lxvd2x.
10173     EVT VT = N->getOperand(1).getValueType();
10174     if (VT.isSimple()) {
10175       MVT StoreVT = VT.getSimpleVT();
10176       if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
10177           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
10178            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
10179         return expandVSXStoreForLE(N, DCI);
10180     }
10181     break;
10182   }
10183   case ISD::LOAD: {
10184     LoadSDNode *LD = cast<LoadSDNode>(N);
10185     EVT VT = LD->getValueType(0);
10186
10187     // For little endian, VSX loads require generating lxvd2x/xxswapd.
10188     if (VT.isSimple()) {
10189       MVT LoadVT = VT.getSimpleVT();
10190       if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
10191           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
10192            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
10193         return expandVSXLoadForLE(N, DCI);
10194     }
10195
10196     EVT MemVT = LD->getMemoryVT();
10197     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
10198     unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
10199     Type *STy = MemVT.getScalarType().getTypeForEVT(*DAG.getContext());
10200     unsigned ScalarABIAlignment = DAG.getDataLayout().getABITypeAlignment(STy);
10201     if (LD->isUnindexed() && VT.isVector() &&
10202         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
10203           // P8 and later hardware should just use LOAD.
10204           !Subtarget.hasP8Vector() && (VT == MVT::v16i8 || VT == MVT::v8i16 ||
10205                                        VT == MVT::v4i32 || VT == MVT::v4f32)) ||
10206          (Subtarget.hasQPX() && (VT == MVT::v4f64 || VT == MVT::v4f32) &&
10207           LD->getAlignment() >= ScalarABIAlignment)) &&
10208         LD->getAlignment() < ABIAlignment) {
10209       // This is a type-legal unaligned Altivec or QPX load.
10210       SDValue Chain = LD->getChain();
10211       SDValue Ptr = LD->getBasePtr();
10212       bool isLittleEndian = Subtarget.isLittleEndian();
10213
10214       // This implements the loading of unaligned vectors as described in
10215       // the venerable Apple Velocity Engine overview. Specifically:
10216       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
10217       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
10218       //
10219       // The general idea is to expand a sequence of one or more unaligned
10220       // loads into an alignment-based permutation-control instruction (lvsl
10221       // or lvsr), a series of regular vector loads (which always truncate
10222       // their input address to an aligned address), and a series of
10223       // permutations.  The results of these permutations are the requested
10224       // loaded values.  The trick is that the last "extra" load is not taken
10225       // from the address you might suspect (sizeof(vector) bytes after the
10226       // last requested load), but rather sizeof(vector) - 1 bytes after the
10227       // last requested vector. The point of this is to avoid a page fault if
10228       // the base address happened to be aligned. This works because if the
10229       // base address is aligned, then adding less than a full vector length
10230       // will cause the last vector in the sequence to be (re)loaded.
10231       // Otherwise, the next vector will be fetched as you might suspect was
10232       // necessary.
10233
10234       // We might be able to reuse the permutation generation from
10235       // a different base address offset from this one by an aligned amount.
10236       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
10237       // optimization later.
10238       Intrinsic::ID Intr, IntrLD, IntrPerm;
10239       MVT PermCntlTy, PermTy, LDTy;
10240       if (Subtarget.hasAltivec()) {
10241         Intr = isLittleEndian ?  Intrinsic::ppc_altivec_lvsr :
10242                                  Intrinsic::ppc_altivec_lvsl;
10243         IntrLD = Intrinsic::ppc_altivec_lvx;
10244         IntrPerm = Intrinsic::ppc_altivec_vperm;
10245         PermCntlTy = MVT::v16i8;
10246         PermTy = MVT::v4i32;
10247         LDTy = MVT::v4i32;
10248       } else {
10249         Intr =   MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlpcld :
10250                                        Intrinsic::ppc_qpx_qvlpcls;
10251         IntrLD = MemVT == MVT::v4f64 ? Intrinsic::ppc_qpx_qvlfd :
10252                                        Intrinsic::ppc_qpx_qvlfs;
10253         IntrPerm = Intrinsic::ppc_qpx_qvfperm;
10254         PermCntlTy = MVT::v4f64;
10255         PermTy = MVT::v4f64;
10256         LDTy = MemVT.getSimpleVT();
10257       }
10258
10259       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
10260
10261       // Create the new MMO for the new base load. It is like the original MMO,
10262       // but represents an area in memory almost twice the vector size centered
10263       // on the original address. If the address is unaligned, we might start
10264       // reading up to (sizeof(vector)-1) bytes below the address of the
10265       // original unaligned load.
10266       MachineFunction &MF = DAG.getMachineFunction();
10267       MachineMemOperand *BaseMMO =
10268         MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1,
10269                                 2*MemVT.getStoreSize()-1);
10270
10271       // Create the new base load.
10272       SDValue LDXIntID =
10273           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
10274       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
10275       SDValue BaseLoad =
10276         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
10277                                 DAG.getVTList(PermTy, MVT::Other),
10278                                 BaseLoadOps, LDTy, BaseMMO);
10279
10280       // Note that the value of IncOffset (which is provided to the next
10281       // load's pointer info offset value, and thus used to calculate the
10282       // alignment), and the value of IncValue (which is actually used to
10283       // increment the pointer value) are different! This is because we
10284       // require the next load to appear to be aligned, even though it
10285       // is actually offset from the base pointer by a lesser amount.
10286       int IncOffset = VT.getSizeInBits() / 8;
10287       int IncValue = IncOffset;
10288
10289       // Walk (both up and down) the chain looking for another load at the real
10290       // (aligned) offset (the alignment of the other load does not matter in
10291       // this case). If found, then do not use the offset reduction trick, as
10292       // that will prevent the loads from being later combined (as they would
10293       // otherwise be duplicates).
10294       if (!findConsecutiveLoad(LD, DAG))
10295         --IncValue;
10296
10297       SDValue Increment =
10298           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
10299       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
10300
10301       MachineMemOperand *ExtraMMO =
10302         MF.getMachineMemOperand(LD->getMemOperand(),
10303                                 1, 2*MemVT.getStoreSize()-1);
10304       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
10305       SDValue ExtraLoad =
10306         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
10307                                 DAG.getVTList(PermTy, MVT::Other),
10308                                 ExtraLoadOps, LDTy, ExtraMMO);
10309
10310       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
10311         BaseLoad.getValue(1), ExtraLoad.getValue(1));
10312
10313       // Because vperm has a big-endian bias, we must reverse the order
10314       // of the input vectors and complement the permute control vector
10315       // when generating little endian code.  We have already handled the
10316       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
10317       // and ExtraLoad here.
10318       SDValue Perm;
10319       if (isLittleEndian)
10320         Perm = BuildIntrinsicOp(IntrPerm,
10321                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
10322       else
10323         Perm = BuildIntrinsicOp(IntrPerm,
10324                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
10325
10326       if (VT != PermTy)
10327         Perm = Subtarget.hasAltivec() ?
10328                  DAG.getNode(ISD::BITCAST, dl, VT, Perm) :
10329                  DAG.getNode(ISD::FP_ROUND, dl, VT, Perm, // QPX
10330                                DAG.getTargetConstant(1, dl, MVT::i64));
10331                                // second argument is 1 because this rounding
10332                                // is always exact.
10333
10334       // The output of the permutation is our loaded result, the TokenFactor is
10335       // our new chain.
10336       DCI.CombineTo(N, Perm, TF);
10337       return SDValue(N, 0);
10338     }
10339     }
10340     break;
10341     case ISD::INTRINSIC_WO_CHAIN: {
10342       bool isLittleEndian = Subtarget.isLittleEndian();
10343       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
10344       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
10345                                            : Intrinsic::ppc_altivec_lvsl);
10346       if ((IID == Intr ||
10347            IID == Intrinsic::ppc_qpx_qvlpcld  ||
10348            IID == Intrinsic::ppc_qpx_qvlpcls) &&
10349         N->getOperand(1)->getOpcode() == ISD::ADD) {
10350         SDValue Add = N->getOperand(1);
10351
10352         int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
10353                    5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
10354
10355         if (DAG.MaskedValueIsZero(
10356                 Add->getOperand(1),
10357                 APInt::getAllOnesValue(Bits /* alignment */)
10358                     .zext(
10359                         Add.getValueType().getScalarType().getSizeInBits()))) {
10360           SDNode *BasePtr = Add->getOperand(0).getNode();
10361           for (SDNode::use_iterator UI = BasePtr->use_begin(),
10362                                     UE = BasePtr->use_end();
10363                UI != UE; ++UI) {
10364             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
10365                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == IID) {
10366               // We've found another LVSL/LVSR, and this address is an aligned
10367               // multiple of that one. The results will be the same, so use the
10368               // one we've just found instead.
10369
10370               return SDValue(*UI, 0);
10371             }
10372           }
10373         }
10374
10375         if (isa<ConstantSDNode>(Add->getOperand(1))) {
10376           SDNode *BasePtr = Add->getOperand(0).getNode();
10377           for (SDNode::use_iterator UI = BasePtr->use_begin(),
10378                UE = BasePtr->use_end(); UI != UE; ++UI) {
10379             if (UI->getOpcode() == ISD::ADD &&
10380                 isa<ConstantSDNode>(UI->getOperand(1)) &&
10381                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
10382                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
10383                 (1ULL << Bits) == 0) {
10384               SDNode *OtherAdd = *UI;
10385               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
10386                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
10387                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
10388                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
10389                   return SDValue(*VI, 0);
10390                 }
10391               }
10392             }
10393           }
10394         }
10395       }
10396     }
10397
10398     break;
10399   case ISD::INTRINSIC_W_CHAIN: {
10400     // For little endian, VSX loads require generating lxvd2x/xxswapd.
10401     if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
10402       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10403       default:
10404         break;
10405       case Intrinsic::ppc_vsx_lxvw4x:
10406       case Intrinsic::ppc_vsx_lxvd2x:
10407         return expandVSXLoadForLE(N, DCI);
10408       }
10409     }
10410     break;
10411   }
10412   case ISD::INTRINSIC_VOID: {
10413     // For little endian, VSX stores require generating xxswapd/stxvd2x.
10414     if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
10415       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
10416       default:
10417         break;
10418       case Intrinsic::ppc_vsx_stxvw4x:
10419       case Intrinsic::ppc_vsx_stxvd2x:
10420         return expandVSXStoreForLE(N, DCI);
10421       }
10422     }
10423     break;
10424   }
10425   case ISD::BSWAP:
10426     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
10427     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
10428         N->getOperand(0).hasOneUse() &&
10429         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
10430          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
10431           N->getValueType(0) == MVT::i64))) {
10432       SDValue Load = N->getOperand(0);
10433       LoadSDNode *LD = cast<LoadSDNode>(Load);
10434       // Create the byte-swapping load.
10435       SDValue Ops[] = {
10436         LD->getChain(),    // Chain
10437         LD->getBasePtr(),  // Ptr
10438         DAG.getValueType(N->getValueType(0)) // VT
10439       };
10440       SDValue BSLoad =
10441         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
10442                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
10443                                               MVT::i64 : MVT::i32, MVT::Other),
10444                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
10445
10446       // If this is an i16 load, insert the truncate.
10447       SDValue ResVal = BSLoad;
10448       if (N->getValueType(0) == MVT::i16)
10449         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
10450
10451       // First, combine the bswap away.  This makes the value produced by the
10452       // load dead.
10453       DCI.CombineTo(N, ResVal);
10454
10455       // Next, combine the load away, we give it a bogus result value but a real
10456       // chain result.  The result value is dead because the bswap is dead.
10457       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
10458
10459       // Return N so it doesn't get rechecked!
10460       return SDValue(N, 0);
10461     }
10462
10463     break;
10464   case PPCISD::VCMP: {
10465     // If a VCMPo node already exists with exactly the same operands as this
10466     // node, use its result instead of this node (VCMPo computes both a CR6 and
10467     // a normal output).
10468     //
10469     if (!N->getOperand(0).hasOneUse() &&
10470         !N->getOperand(1).hasOneUse() &&
10471         !N->getOperand(2).hasOneUse()) {
10472
10473       // Scan all of the users of the LHS, looking for VCMPo's that match.
10474       SDNode *VCMPoNode = nullptr;
10475
10476       SDNode *LHSN = N->getOperand(0).getNode();
10477       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
10478            UI != E; ++UI)
10479         if (UI->getOpcode() == PPCISD::VCMPo &&
10480             UI->getOperand(1) == N->getOperand(1) &&
10481             UI->getOperand(2) == N->getOperand(2) &&
10482             UI->getOperand(0) == N->getOperand(0)) {
10483           VCMPoNode = *UI;
10484           break;
10485         }
10486
10487       // If there is no VCMPo node, or if the flag value has a single use, don't
10488       // transform this.
10489       if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
10490         break;
10491
10492       // Look at the (necessarily single) use of the flag value.  If it has a
10493       // chain, this transformation is more complex.  Note that multiple things
10494       // could use the value result, which we should ignore.
10495       SDNode *FlagUser = nullptr;
10496       for (SDNode::use_iterator UI = VCMPoNode->use_begin();
10497            FlagUser == nullptr; ++UI) {
10498         assert(UI != VCMPoNode->use_end() && "Didn't find user!");
10499         SDNode *User = *UI;
10500         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
10501           if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
10502             FlagUser = User;
10503             break;
10504           }
10505         }
10506       }
10507
10508       // If the user is a MFOCRF instruction, we know this is safe.
10509       // Otherwise we give up for right now.
10510       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
10511         return SDValue(VCMPoNode, 0);
10512     }
10513     break;
10514   }
10515   case ISD::BRCOND: {
10516     SDValue Cond = N->getOperand(1);
10517     SDValue Target = N->getOperand(2);
10518
10519     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
10520         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
10521           Intrinsic::ppc_is_decremented_ctr_nonzero) {
10522
10523       // We now need to make the intrinsic dead (it cannot be instruction
10524       // selected).
10525       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
10526       assert(Cond.getNode()->hasOneUse() &&
10527              "Counter decrement has more than one use");
10528
10529       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
10530                          N->getOperand(0), Target);
10531     }
10532   }
10533   break;
10534   case ISD::BR_CC: {
10535     // If this is a branch on an altivec predicate comparison, lower this so
10536     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
10537     // lowering is done pre-legalize, because the legalizer lowers the predicate
10538     // compare down to code that is difficult to reassemble.
10539     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
10540     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
10541
10542     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
10543     // value. If so, pass-through the AND to get to the intrinsic.
10544     if (LHS.getOpcode() == ISD::AND &&
10545         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
10546         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
10547           Intrinsic::ppc_is_decremented_ctr_nonzero &&
10548         isa<ConstantSDNode>(LHS.getOperand(1)) &&
10549         !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
10550           isZero())
10551       LHS = LHS.getOperand(0);
10552
10553     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
10554         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
10555           Intrinsic::ppc_is_decremented_ctr_nonzero &&
10556         isa<ConstantSDNode>(RHS)) {
10557       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
10558              "Counter decrement comparison is not EQ or NE");
10559
10560       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
10561       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
10562                     (CC == ISD::SETNE && !Val);
10563
10564       // We now need to make the intrinsic dead (it cannot be instruction
10565       // selected).
10566       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
10567       assert(LHS.getNode()->hasOneUse() &&
10568              "Counter decrement has more than one use");
10569
10570       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
10571                          N->getOperand(0), N->getOperand(4));
10572     }
10573
10574     int CompareOpc;
10575     bool isDot;
10576
10577     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
10578         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
10579         getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
10580       assert(isDot && "Can't compare against a vector result!");
10581
10582       // If this is a comparison against something other than 0/1, then we know
10583       // that the condition is never/always true.
10584       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
10585       if (Val != 0 && Val != 1) {
10586         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
10587           return N->getOperand(0);
10588         // Always !=, turn it into an unconditional branch.
10589         return DAG.getNode(ISD::BR, dl, MVT::Other,
10590                            N->getOperand(0), N->getOperand(4));
10591       }
10592
10593       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
10594
10595       // Create the PPCISD altivec 'dot' comparison node.
10596       SDValue Ops[] = {
10597         LHS.getOperand(2),  // LHS of compare
10598         LHS.getOperand(3),  // RHS of compare
10599         DAG.getConstant(CompareOpc, dl, MVT::i32)
10600       };
10601       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
10602       SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
10603
10604       // Unpack the result based on how the target uses it.
10605       PPC::Predicate CompOpc;
10606       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
10607       default:  // Can't happen, don't crash on invalid number though.
10608       case 0:   // Branch on the value of the EQ bit of CR6.
10609         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
10610         break;
10611       case 1:   // Branch on the inverted value of the EQ bit of CR6.
10612         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
10613         break;
10614       case 2:   // Branch on the value of the LT bit of CR6.
10615         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
10616         break;
10617       case 3:   // Branch on the inverted value of the LT bit of CR6.
10618         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
10619         break;
10620       }
10621
10622       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
10623                          DAG.getConstant(CompOpc, dl, MVT::i32),
10624                          DAG.getRegister(PPC::CR6, MVT::i32),
10625                          N->getOperand(4), CompNode.getValue(1));
10626     }
10627     break;
10628   }
10629   }
10630
10631   return SDValue();
10632 }
10633
10634 SDValue
10635 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10636                                   SelectionDAG &DAG,
10637                                   std::vector<SDNode *> *Created) const {
10638   // fold (sdiv X, pow2)
10639   EVT VT = N->getValueType(0);
10640   if (VT == MVT::i64 && !Subtarget.isPPC64())
10641     return SDValue();
10642   if ((VT != MVT::i32 && VT != MVT::i64) ||
10643       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
10644     return SDValue();
10645
10646   SDLoc DL(N);
10647   SDValue N0 = N->getOperand(0);
10648
10649   bool IsNegPow2 = (-Divisor).isPowerOf2();
10650   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
10651   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
10652
10653   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
10654   if (Created)
10655     Created->push_back(Op.getNode());
10656
10657   if (IsNegPow2) {
10658     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
10659     if (Created)
10660       Created->push_back(Op.getNode());
10661   }
10662
10663   return Op;
10664 }
10665
10666 //===----------------------------------------------------------------------===//
10667 // Inline Assembly Support
10668 //===----------------------------------------------------------------------===//
10669
10670 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
10671                                                       APInt &KnownZero,
10672                                                       APInt &KnownOne,
10673                                                       const SelectionDAG &DAG,
10674                                                       unsigned Depth) const {
10675   KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
10676   switch (Op.getOpcode()) {
10677   default: break;
10678   case PPCISD::LBRX: {
10679     // lhbrx is known to have the top bits cleared out.
10680     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
10681       KnownZero = 0xFFFF0000;
10682     break;
10683   }
10684   case ISD::INTRINSIC_WO_CHAIN: {
10685     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
10686     default: break;
10687     case Intrinsic::ppc_altivec_vcmpbfp_p:
10688     case Intrinsic::ppc_altivec_vcmpeqfp_p:
10689     case Intrinsic::ppc_altivec_vcmpequb_p:
10690     case Intrinsic::ppc_altivec_vcmpequh_p:
10691     case Intrinsic::ppc_altivec_vcmpequw_p:
10692     case Intrinsic::ppc_altivec_vcmpequd_p:
10693     case Intrinsic::ppc_altivec_vcmpgefp_p:
10694     case Intrinsic::ppc_altivec_vcmpgtfp_p:
10695     case Intrinsic::ppc_altivec_vcmpgtsb_p:
10696     case Intrinsic::ppc_altivec_vcmpgtsh_p:
10697     case Intrinsic::ppc_altivec_vcmpgtsw_p:
10698     case Intrinsic::ppc_altivec_vcmpgtsd_p:
10699     case Intrinsic::ppc_altivec_vcmpgtub_p:
10700     case Intrinsic::ppc_altivec_vcmpgtuh_p:
10701     case Intrinsic::ppc_altivec_vcmpgtuw_p:
10702     case Intrinsic::ppc_altivec_vcmpgtud_p:
10703       KnownZero = ~1U;  // All bits but the low one are known to be zero.
10704       break;
10705     }
10706   }
10707   }
10708 }
10709
10710 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
10711   switch (Subtarget.getDarwinDirective()) {
10712   default: break;
10713   case PPC::DIR_970:
10714   case PPC::DIR_PWR4:
10715   case PPC::DIR_PWR5:
10716   case PPC::DIR_PWR5X:
10717   case PPC::DIR_PWR6:
10718   case PPC::DIR_PWR6X:
10719   case PPC::DIR_PWR7:
10720   case PPC::DIR_PWR8: {
10721     if (!ML)
10722       break;
10723
10724     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
10725
10726     // For small loops (between 5 and 8 instructions), align to a 32-byte
10727     // boundary so that the entire loop fits in one instruction-cache line.
10728     uint64_t LoopSize = 0;
10729     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
10730       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J)
10731         LoopSize += TII->GetInstSizeInBytes(J);
10732
10733     if (LoopSize > 16 && LoopSize <= 32)
10734       return 5;
10735
10736     break;
10737   }
10738   }
10739
10740   return TargetLowering::getPrefLoopAlignment(ML);
10741 }
10742
10743 /// getConstraintType - Given a constraint, return the type of
10744 /// constraint it is for this target.
10745 PPCTargetLowering::ConstraintType
10746 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
10747   if (Constraint.size() == 1) {
10748     switch (Constraint[0]) {
10749     default: break;
10750     case 'b':
10751     case 'r':
10752     case 'f':
10753     case 'v':
10754     case 'y':
10755       return C_RegisterClass;
10756     case 'Z':
10757       // FIXME: While Z does indicate a memory constraint, it specifically
10758       // indicates an r+r address (used in conjunction with the 'y' modifier
10759       // in the replacement string). Currently, we're forcing the base
10760       // register to be r0 in the asm printer (which is interpreted as zero)
10761       // and forming the complete address in the second register. This is
10762       // suboptimal.
10763       return C_Memory;
10764     }
10765   } else if (Constraint == "wc") { // individual CR bits.
10766     return C_RegisterClass;
10767   } else if (Constraint == "wa" || Constraint == "wd" ||
10768              Constraint == "wf" || Constraint == "ws") {
10769     return C_RegisterClass; // VSX registers.
10770   }
10771   return TargetLowering::getConstraintType(Constraint);
10772 }
10773
10774 /// Examine constraint type and operand type and determine a weight value.
10775 /// This object must already have been set up with the operand type
10776 /// and the current alternative constraint selected.
10777 TargetLowering::ConstraintWeight
10778 PPCTargetLowering::getSingleConstraintMatchWeight(
10779     AsmOperandInfo &info, const char *constraint) const {
10780   ConstraintWeight weight = CW_Invalid;
10781   Value *CallOperandVal = info.CallOperandVal;
10782     // If we don't have a value, we can't do a match,
10783     // but allow it at the lowest weight.
10784   if (!CallOperandVal)
10785     return CW_Default;
10786   Type *type = CallOperandVal->getType();
10787
10788   // Look at the constraint type.
10789   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
10790     return CW_Register; // an individual CR bit.
10791   else if ((StringRef(constraint) == "wa" ||
10792             StringRef(constraint) == "wd" ||
10793             StringRef(constraint) == "wf") &&
10794            type->isVectorTy())
10795     return CW_Register;
10796   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
10797     return CW_Register;
10798
10799   switch (*constraint) {
10800   default:
10801     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
10802     break;
10803   case 'b':
10804     if (type->isIntegerTy())
10805       weight = CW_Register;
10806     break;
10807   case 'f':
10808     if (type->isFloatTy())
10809       weight = CW_Register;
10810     break;
10811   case 'd':
10812     if (type->isDoubleTy())
10813       weight = CW_Register;
10814     break;
10815   case 'v':
10816     if (type->isVectorTy())
10817       weight = CW_Register;
10818     break;
10819   case 'y':
10820     weight = CW_Register;
10821     break;
10822   case 'Z':
10823     weight = CW_Memory;
10824     break;
10825   }
10826   return weight;
10827 }
10828
10829 std::pair<unsigned, const TargetRegisterClass *>
10830 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
10831                                                 StringRef Constraint,
10832                                                 MVT VT) const {
10833   if (Constraint.size() == 1) {
10834     // GCC RS6000 Constraint Letters
10835     switch (Constraint[0]) {
10836     case 'b':   // R1-R31
10837       if (VT == MVT::i64 && Subtarget.isPPC64())
10838         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
10839       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
10840     case 'r':   // R0-R31
10841       if (VT == MVT::i64 && Subtarget.isPPC64())
10842         return std::make_pair(0U, &PPC::G8RCRegClass);
10843       return std::make_pair(0U, &PPC::GPRCRegClass);
10844     case 'f':
10845       if (VT == MVT::f32 || VT == MVT::i32)
10846         return std::make_pair(0U, &PPC::F4RCRegClass);
10847       if (VT == MVT::f64 || VT == MVT::i64)
10848         return std::make_pair(0U, &PPC::F8RCRegClass);
10849       if (VT == MVT::v4f64 && Subtarget.hasQPX())
10850         return std::make_pair(0U, &PPC::QFRCRegClass);
10851       if (VT == MVT::v4f32 && Subtarget.hasQPX())
10852         return std::make_pair(0U, &PPC::QSRCRegClass);
10853       break;
10854     case 'v':
10855       if (VT == MVT::v4f64 && Subtarget.hasQPX())
10856         return std::make_pair(0U, &PPC::QFRCRegClass);
10857       if (VT == MVT::v4f32 && Subtarget.hasQPX())
10858         return std::make_pair(0U, &PPC::QSRCRegClass);
10859       return std::make_pair(0U, &PPC::VRRCRegClass);
10860     case 'y':   // crrc
10861       return std::make_pair(0U, &PPC::CRRCRegClass);
10862     }
10863   } else if (Constraint == "wc") { // an individual CR bit.
10864     return std::make_pair(0U, &PPC::CRBITRCRegClass);
10865   } else if (Constraint == "wa" || Constraint == "wd" ||
10866              Constraint == "wf") {
10867     return std::make_pair(0U, &PPC::VSRCRegClass);
10868   } else if (Constraint == "ws") {
10869     if (VT == MVT::f32)
10870       return std::make_pair(0U, &PPC::VSSRCRegClass);
10871     else
10872       return std::make_pair(0U, &PPC::VSFRCRegClass);
10873   }
10874
10875   std::pair<unsigned, const TargetRegisterClass *> R =
10876       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
10877
10878   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
10879   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
10880   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
10881   // register.
10882   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
10883   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
10884   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
10885       PPC::GPRCRegClass.contains(R.first))
10886     return std::make_pair(TRI->getMatchingSuperReg(R.first,
10887                             PPC::sub_32, &PPC::G8RCRegClass),
10888                           &PPC::G8RCRegClass);
10889
10890   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
10891   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
10892     R.first = PPC::CR0;
10893     R.second = &PPC::CRRCRegClass;
10894   }
10895
10896   return R;
10897 }
10898
10899
10900 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
10901 /// vector.  If it is invalid, don't add anything to Ops.
10902 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
10903                                                      std::string &Constraint,
10904                                                      std::vector<SDValue>&Ops,
10905                                                      SelectionDAG &DAG) const {
10906   SDValue Result;
10907
10908   // Only support length 1 constraints.
10909   if (Constraint.length() > 1) return;
10910
10911   char Letter = Constraint[0];
10912   switch (Letter) {
10913   default: break;
10914   case 'I':
10915   case 'J':
10916   case 'K':
10917   case 'L':
10918   case 'M':
10919   case 'N':
10920   case 'O':
10921   case 'P': {
10922     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
10923     if (!CST) return; // Must be an immediate to match.
10924     SDLoc dl(Op);
10925     int64_t Value = CST->getSExtValue();
10926     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
10927                          // numbers are printed as such.
10928     switch (Letter) {
10929     default: llvm_unreachable("Unknown constraint letter!");
10930     case 'I':  // "I" is a signed 16-bit constant.
10931       if (isInt<16>(Value))
10932         Result = DAG.getTargetConstant(Value, dl, TCVT);
10933       break;
10934     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
10935       if (isShiftedUInt<16, 16>(Value))
10936         Result = DAG.getTargetConstant(Value, dl, TCVT);
10937       break;
10938     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
10939       if (isShiftedInt<16, 16>(Value))
10940         Result = DAG.getTargetConstant(Value, dl, TCVT);
10941       break;
10942     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
10943       if (isUInt<16>(Value))
10944         Result = DAG.getTargetConstant(Value, dl, TCVT);
10945       break;
10946     case 'M':  // "M" is a constant that is greater than 31.
10947       if (Value > 31)
10948         Result = DAG.getTargetConstant(Value, dl, TCVT);
10949       break;
10950     case 'N':  // "N" is a positive constant that is an exact power of two.
10951       if (Value > 0 && isPowerOf2_64(Value))
10952         Result = DAG.getTargetConstant(Value, dl, TCVT);
10953       break;
10954     case 'O':  // "O" is the constant zero.
10955       if (Value == 0)
10956         Result = DAG.getTargetConstant(Value, dl, TCVT);
10957       break;
10958     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
10959       if (isInt<16>(-Value))
10960         Result = DAG.getTargetConstant(Value, dl, TCVT);
10961       break;
10962     }
10963     break;
10964   }
10965   }
10966
10967   if (Result.getNode()) {
10968     Ops.push_back(Result);
10969     return;
10970   }
10971
10972   // Handle standard constraint letters.
10973   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
10974 }
10975
10976 // isLegalAddressingMode - Return true if the addressing mode represented
10977 // by AM is legal for this target, for a load/store of the specified type.
10978 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
10979                                               const AddrMode &AM, Type *Ty,
10980                                               unsigned AS) const {
10981   // PPC does not allow r+i addressing modes for vectors!
10982   if (Ty->isVectorTy() && AM.BaseOffs != 0)
10983     return false;
10984
10985   // PPC allows a sign-extended 16-bit immediate field.
10986   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
10987     return false;
10988
10989   // No global is ever allowed as a base.
10990   if (AM.BaseGV)
10991     return false;
10992
10993   // PPC only support r+r,
10994   switch (AM.Scale) {
10995   case 0:  // "r+i" or just "i", depending on HasBaseReg.
10996     break;
10997   case 1:
10998     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
10999       return false;
11000     // Otherwise we have r+r or r+i.
11001     break;
11002   case 2:
11003     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
11004       return false;
11005     // Allow 2*r as r+r.
11006     break;
11007   default:
11008     // No other scales are supported.
11009     return false;
11010   }
11011
11012   return true;
11013 }
11014
11015 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
11016                                            SelectionDAG &DAG) const {
11017   MachineFunction &MF = DAG.getMachineFunction();
11018   MachineFrameInfo *MFI = MF.getFrameInfo();
11019   MFI->setReturnAddressIsTaken(true);
11020
11021   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
11022     return SDValue();
11023
11024   SDLoc dl(Op);
11025   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
11026
11027   // Make sure the function does not optimize away the store of the RA to
11028   // the stack.
11029   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
11030   FuncInfo->setLRStoreRequired();
11031   bool isPPC64 = Subtarget.isPPC64();
11032   auto PtrVT = getPointerTy(MF.getDataLayout());
11033
11034   if (Depth > 0) {
11035     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
11036     SDValue Offset =
11037         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
11038                         isPPC64 ? MVT::i64 : MVT::i32);
11039     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
11040                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
11041                        MachinePointerInfo(), false, false, false, 0);
11042   }
11043
11044   // Just load the return address off the stack.
11045   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
11046   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
11047                      MachinePointerInfo(), false, false, false, 0);
11048 }
11049
11050 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
11051                                           SelectionDAG &DAG) const {
11052   SDLoc dl(Op);
11053   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
11054
11055   MachineFunction &MF = DAG.getMachineFunction();
11056   MachineFrameInfo *MFI = MF.getFrameInfo();
11057   MFI->setFrameAddressIsTaken(true);
11058
11059   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
11060   bool isPPC64 = PtrVT == MVT::i64;
11061
11062   // Naked functions never have a frame pointer, and so we use r1. For all
11063   // other functions, this decision must be delayed until during PEI.
11064   unsigned FrameReg;
11065   if (MF.getFunction()->hasFnAttribute(Attribute::Naked))
11066     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
11067   else
11068     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
11069
11070   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
11071                                          PtrVT);
11072   while (Depth--)
11073     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
11074                             FrameAddr, MachinePointerInfo(), false, false,
11075                             false, 0);
11076   return FrameAddr;
11077 }
11078
11079 // FIXME? Maybe this could be a TableGen attribute on some registers and
11080 // this table could be generated automatically from RegInfo.
11081 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT,
11082                                               SelectionDAG &DAG) const {
11083   bool isPPC64 = Subtarget.isPPC64();
11084   bool isDarwinABI = Subtarget.isDarwinABI();
11085
11086   if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
11087       (!isPPC64 && VT != MVT::i32))
11088     report_fatal_error("Invalid register global variable type");
11089
11090   bool is64Bit = isPPC64 && VT == MVT::i64;
11091   unsigned Reg = StringSwitch<unsigned>(RegName)
11092                    .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
11093                    .Case("r2", (isDarwinABI || isPPC64) ? 0 : PPC::R2)
11094                    .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
11095                                   (is64Bit ? PPC::X13 : PPC::R13))
11096                    .Default(0);
11097
11098   if (Reg)
11099     return Reg;
11100   report_fatal_error("Invalid register name global variable");
11101 }
11102
11103 bool
11104 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
11105   // The PowerPC target isn't yet aware of offsets.
11106   return false;
11107 }
11108
11109 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
11110                                            const CallInst &I,
11111                                            unsigned Intrinsic) const {
11112
11113   switch (Intrinsic) {
11114   case Intrinsic::ppc_qpx_qvlfd:
11115   case Intrinsic::ppc_qpx_qvlfs:
11116   case Intrinsic::ppc_qpx_qvlfcd:
11117   case Intrinsic::ppc_qpx_qvlfcs:
11118   case Intrinsic::ppc_qpx_qvlfiwa:
11119   case Intrinsic::ppc_qpx_qvlfiwz:
11120   case Intrinsic::ppc_altivec_lvx:
11121   case Intrinsic::ppc_altivec_lvxl:
11122   case Intrinsic::ppc_altivec_lvebx:
11123   case Intrinsic::ppc_altivec_lvehx:
11124   case Intrinsic::ppc_altivec_lvewx:
11125   case Intrinsic::ppc_vsx_lxvd2x:
11126   case Intrinsic::ppc_vsx_lxvw4x: {
11127     EVT VT;
11128     switch (Intrinsic) {
11129     case Intrinsic::ppc_altivec_lvebx:
11130       VT = MVT::i8;
11131       break;
11132     case Intrinsic::ppc_altivec_lvehx:
11133       VT = MVT::i16;
11134       break;
11135     case Intrinsic::ppc_altivec_lvewx:
11136       VT = MVT::i32;
11137       break;
11138     case Intrinsic::ppc_vsx_lxvd2x:
11139       VT = MVT::v2f64;
11140       break;
11141     case Intrinsic::ppc_qpx_qvlfd:
11142       VT = MVT::v4f64;
11143       break;
11144     case Intrinsic::ppc_qpx_qvlfs:
11145       VT = MVT::v4f32;
11146       break;
11147     case Intrinsic::ppc_qpx_qvlfcd:
11148       VT = MVT::v2f64;
11149       break;
11150     case Intrinsic::ppc_qpx_qvlfcs:
11151       VT = MVT::v2f32;
11152       break;
11153     default:
11154       VT = MVT::v4i32;
11155       break;
11156     }
11157
11158     Info.opc = ISD::INTRINSIC_W_CHAIN;
11159     Info.memVT = VT;
11160     Info.ptrVal = I.getArgOperand(0);
11161     Info.offset = -VT.getStoreSize()+1;
11162     Info.size = 2*VT.getStoreSize()-1;
11163     Info.align = 1;
11164     Info.vol = false;
11165     Info.readMem = true;
11166     Info.writeMem = false;
11167     return true;
11168   }
11169   case Intrinsic::ppc_qpx_qvlfda:
11170   case Intrinsic::ppc_qpx_qvlfsa:
11171   case Intrinsic::ppc_qpx_qvlfcda:
11172   case Intrinsic::ppc_qpx_qvlfcsa:
11173   case Intrinsic::ppc_qpx_qvlfiwaa:
11174   case Intrinsic::ppc_qpx_qvlfiwza: {
11175     EVT VT;
11176     switch (Intrinsic) {
11177     case Intrinsic::ppc_qpx_qvlfda:
11178       VT = MVT::v4f64;
11179       break;
11180     case Intrinsic::ppc_qpx_qvlfsa:
11181       VT = MVT::v4f32;
11182       break;
11183     case Intrinsic::ppc_qpx_qvlfcda:
11184       VT = MVT::v2f64;
11185       break;
11186     case Intrinsic::ppc_qpx_qvlfcsa:
11187       VT = MVT::v2f32;
11188       break;
11189     default:
11190       VT = MVT::v4i32;
11191       break;
11192     }
11193
11194     Info.opc = ISD::INTRINSIC_W_CHAIN;
11195     Info.memVT = VT;
11196     Info.ptrVal = I.getArgOperand(0);
11197     Info.offset = 0;
11198     Info.size = VT.getStoreSize();
11199     Info.align = 1;
11200     Info.vol = false;
11201     Info.readMem = true;
11202     Info.writeMem = false;
11203     return true;
11204   }
11205   case Intrinsic::ppc_qpx_qvstfd:
11206   case Intrinsic::ppc_qpx_qvstfs:
11207   case Intrinsic::ppc_qpx_qvstfcd:
11208   case Intrinsic::ppc_qpx_qvstfcs:
11209   case Intrinsic::ppc_qpx_qvstfiw:
11210   case Intrinsic::ppc_altivec_stvx:
11211   case Intrinsic::ppc_altivec_stvxl:
11212   case Intrinsic::ppc_altivec_stvebx:
11213   case Intrinsic::ppc_altivec_stvehx:
11214   case Intrinsic::ppc_altivec_stvewx:
11215   case Intrinsic::ppc_vsx_stxvd2x:
11216   case Intrinsic::ppc_vsx_stxvw4x: {
11217     EVT VT;
11218     switch (Intrinsic) {
11219     case Intrinsic::ppc_altivec_stvebx:
11220       VT = MVT::i8;
11221       break;
11222     case Intrinsic::ppc_altivec_stvehx:
11223       VT = MVT::i16;
11224       break;
11225     case Intrinsic::ppc_altivec_stvewx:
11226       VT = MVT::i32;
11227       break;
11228     case Intrinsic::ppc_vsx_stxvd2x:
11229       VT = MVT::v2f64;
11230       break;
11231     case Intrinsic::ppc_qpx_qvstfd:
11232       VT = MVT::v4f64;
11233       break;
11234     case Intrinsic::ppc_qpx_qvstfs:
11235       VT = MVT::v4f32;
11236       break;
11237     case Intrinsic::ppc_qpx_qvstfcd:
11238       VT = MVT::v2f64;
11239       break;
11240     case Intrinsic::ppc_qpx_qvstfcs:
11241       VT = MVT::v2f32;
11242       break;
11243     default:
11244       VT = MVT::v4i32;
11245       break;
11246     }
11247
11248     Info.opc = ISD::INTRINSIC_VOID;
11249     Info.memVT = VT;
11250     Info.ptrVal = I.getArgOperand(1);
11251     Info.offset = -VT.getStoreSize()+1;
11252     Info.size = 2*VT.getStoreSize()-1;
11253     Info.align = 1;
11254     Info.vol = false;
11255     Info.readMem = false;
11256     Info.writeMem = true;
11257     return true;
11258   }
11259   case Intrinsic::ppc_qpx_qvstfda:
11260   case Intrinsic::ppc_qpx_qvstfsa:
11261   case Intrinsic::ppc_qpx_qvstfcda:
11262   case Intrinsic::ppc_qpx_qvstfcsa:
11263   case Intrinsic::ppc_qpx_qvstfiwa: {
11264     EVT VT;
11265     switch (Intrinsic) {
11266     case Intrinsic::ppc_qpx_qvstfda:
11267       VT = MVT::v4f64;
11268       break;
11269     case Intrinsic::ppc_qpx_qvstfsa:
11270       VT = MVT::v4f32;
11271       break;
11272     case Intrinsic::ppc_qpx_qvstfcda:
11273       VT = MVT::v2f64;
11274       break;
11275     case Intrinsic::ppc_qpx_qvstfcsa:
11276       VT = MVT::v2f32;
11277       break;
11278     default:
11279       VT = MVT::v4i32;
11280       break;
11281     }
11282
11283     Info.opc = ISD::INTRINSIC_VOID;
11284     Info.memVT = VT;
11285     Info.ptrVal = I.getArgOperand(1);
11286     Info.offset = 0;
11287     Info.size = VT.getStoreSize();
11288     Info.align = 1;
11289     Info.vol = false;
11290     Info.readMem = false;
11291     Info.writeMem = true;
11292     return true;
11293   }
11294   default:
11295     break;
11296   }
11297
11298   return false;
11299 }
11300
11301 /// getOptimalMemOpType - Returns the target specific optimal type for load
11302 /// and store operations as a result of memset, memcpy, and memmove
11303 /// lowering. If DstAlign is zero that means it's safe to destination
11304 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
11305 /// means there isn't a need to check it against alignment requirement,
11306 /// probably because the source does not need to be loaded. If 'IsMemset' is
11307 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
11308 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
11309 /// source is constant so it does not need to be loaded.
11310 /// It returns EVT::Other if the type should be determined using generic
11311 /// target-independent logic.
11312 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
11313                                            unsigned DstAlign, unsigned SrcAlign,
11314                                            bool IsMemset, bool ZeroMemset,
11315                                            bool MemcpyStrSrc,
11316                                            MachineFunction &MF) const {
11317   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
11318     const Function *F = MF.getFunction();
11319     // When expanding a memset, require at least two QPX instructions to cover
11320     // the cost of loading the value to be stored from the constant pool.
11321     if (Subtarget.hasQPX() && Size >= 32 && (!IsMemset || Size >= 64) &&
11322        (!SrcAlign || SrcAlign >= 32) && (!DstAlign || DstAlign >= 32) &&
11323         !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
11324       return MVT::v4f64;
11325     }
11326
11327     // We should use Altivec/VSX loads and stores when available. For unaligned
11328     // addresses, unaligned VSX loads are only fast starting with the P8.
11329     if (Subtarget.hasAltivec() && Size >= 16 &&
11330         (((!SrcAlign || SrcAlign >= 16) && (!DstAlign || DstAlign >= 16)) ||
11331          ((IsMemset && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
11332       return MVT::v4i32;
11333   }
11334
11335   if (Subtarget.isPPC64()) {
11336     return MVT::i64;
11337   }
11338
11339   return MVT::i32;
11340 }
11341
11342 /// \brief Returns true if it is beneficial to convert a load of a constant
11343 /// to just the constant itself.
11344 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
11345                                                           Type *Ty) const {
11346   assert(Ty->isIntegerTy());
11347
11348   unsigned BitSize = Ty->getPrimitiveSizeInBits();
11349   if (BitSize == 0 || BitSize > 64)
11350     return false;
11351   return true;
11352 }
11353
11354 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
11355   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
11356     return false;
11357   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
11358   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
11359   return NumBits1 == 64 && NumBits2 == 32;
11360 }
11361
11362 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
11363   if (!VT1.isInteger() || !VT2.isInteger())
11364     return false;
11365   unsigned NumBits1 = VT1.getSizeInBits();
11366   unsigned NumBits2 = VT2.getSizeInBits();
11367   return NumBits1 == 64 && NumBits2 == 32;
11368 }
11369
11370 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
11371   // Generally speaking, zexts are not free, but they are free when they can be
11372   // folded with other operations.
11373   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
11374     EVT MemVT = LD->getMemoryVT();
11375     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
11376          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
11377         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
11378          LD->getExtensionType() == ISD::ZEXTLOAD))
11379       return true;
11380   }
11381
11382   // FIXME: Add other cases...
11383   //  - 32-bit shifts with a zext to i64
11384   //  - zext after ctlz, bswap, etc.
11385   //  - zext after and by a constant mask
11386
11387   return TargetLowering::isZExtFree(Val, VT2);
11388 }
11389
11390 bool PPCTargetLowering::isFPExtFree(EVT VT) const {
11391   assert(VT.isFloatingPoint());
11392   return true;
11393 }
11394
11395 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
11396   return isInt<16>(Imm) || isUInt<16>(Imm);
11397 }
11398
11399 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
11400   return isInt<16>(Imm) || isUInt<16>(Imm);
11401 }
11402
11403 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
11404                                                        unsigned,
11405                                                        unsigned,
11406                                                        bool *Fast) const {
11407   if (DisablePPCUnaligned)
11408     return false;
11409
11410   // PowerPC supports unaligned memory access for simple non-vector types.
11411   // Although accessing unaligned addresses is not as efficient as accessing
11412   // aligned addresses, it is generally more efficient than manual expansion,
11413   // and generally only traps for software emulation when crossing page
11414   // boundaries.
11415
11416   if (!VT.isSimple())
11417     return false;
11418
11419   if (VT.getSimpleVT().isVector()) {
11420     if (Subtarget.hasVSX()) {
11421       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
11422           VT != MVT::v4f32 && VT != MVT::v4i32)
11423         return false;
11424     } else {
11425       return false;
11426     }
11427   }
11428
11429   if (VT == MVT::ppcf128)
11430     return false;
11431
11432   if (Fast)
11433     *Fast = true;
11434
11435   return true;
11436 }
11437
11438 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
11439   VT = VT.getScalarType();
11440
11441   if (!VT.isSimple())
11442     return false;
11443
11444   switch (VT.getSimpleVT().SimpleTy) {
11445   case MVT::f32:
11446   case MVT::f64:
11447     return true;
11448   default:
11449     break;
11450   }
11451
11452   return false;
11453 }
11454
11455 const MCPhysReg *
11456 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
11457   // LR is a callee-save register, but we must treat it as clobbered by any call
11458   // site. Hence we include LR in the scratch registers, which are in turn added
11459   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
11460   // to CTR, which is used by any indirect call.
11461   static const MCPhysReg ScratchRegs[] = {
11462     PPC::X12, PPC::LR8, PPC::CTR8, 0
11463   };
11464
11465   return ScratchRegs;
11466 }
11467
11468 bool
11469 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
11470                      EVT VT , unsigned DefinedValues) const {
11471   if (VT == MVT::v2i64)
11472     return false;
11473
11474   if (Subtarget.hasQPX()) {
11475     if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
11476       return true;
11477   }
11478
11479   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
11480 }
11481
11482 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
11483   if (DisableILPPref || Subtarget.enableMachineScheduler())
11484     return TargetLowering::getSchedulingPreference(N);
11485
11486   return Sched::ILP;
11487 }
11488
11489 // Create a fast isel object.
11490 FastISel *
11491 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
11492                                   const TargetLibraryInfo *LibInfo) const {
11493   return PPC::createFastISel(FuncInfo, LibInfo);
11494 }