[C++] Use 'nullptr'.

[oota-llvm.git] / lib / Target / PowerPC / PPCISelLowering.cpp
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index cec0b8f3c7cdf41c109599d030d4d82523df1e80..119911626d6762c23e521b7d0156599c9d57f138 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16,6 +16,7 @@
  #include "PPCMachineFunctionInfo.h"
  #include "PPCPerfectShuffle.h"
  #include "PPCTargetMachine.h"
+#include "PPCTargetObjectFile.h"
  #include "llvm/ADT/STLExtras.h"
  #include "llvm/CodeGen/CallingConvLower.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -36,21 +37,6 @@
  #include "llvm/Target/TargetOptions.h"
  using namespace llvm;
  
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                       CCValAssign::LocInfo &LocInfo,
-                                       ISD::ArgFlagsTy &ArgFlags,
-                                       CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                              MVT &LocVT,
-                                              CCValAssign::LocInfo &LocInfo,
-                                              ISD::ArgFlagsTy &ArgFlags,
-                                              CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                                MVT &LocVT,
-                                                CCValAssign::LocInfo &LocInfo,
-                                                ISD::ArgFlagsTy &ArgFlags,
-                                                CCState &State);
-
  static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
  cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
  
@@ -60,18 +46,22 @@ cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hi
  static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
  cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
  
+// FIXME: Remove this once the bug has been fixed!
+extern cl::opt<bool> ANDIGlueBug;
+
  static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
    if (TM.getSubtargetImpl()->isDarwin())
      return new TargetLoweringObjectFileMachO();
  
+  if (TM.getSubtargetImpl()->isSVR4ABI())
+    return new PPC64LinuxTargetObjectFile();
+
    return new TargetLoweringObjectFileELF();
  }
  
  PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
    const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
-  PPCRegInfo = TM.getRegisterInfo();
-  PPCII = TM.getInstrInfo();
  
    setPow2DivIsCheap();
  
@@ -107,6 +97,39 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
    setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
  
+  if (Subtarget->useCRBits()) {
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+    if (isPPC64 || Subtarget->hasFPCVT()) {
+      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+      AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
+                         isPPC64 ? MVT::i64 : MVT::i32);
+      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+      AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, 
+                         isPPC64 ? MVT::i64 : MVT::i32);
+    } else {
+      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
+    }
+
+    // PowerPC does not support direct load / store of condition registers
+    setOperationAction(ISD::LOAD, MVT::i1, Custom);
+    setOperationAction(ISD::STORE, MVT::i1, Custom);
+
+    // FIXME: Remove this once the ANDI glue bug is fixed:
+    if (ANDIGlueBug)
+      setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
+
+    setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+    setTruncStoreAction(MVT::i64, MVT::i1, Expand);
+    setTruncStoreAction(MVT::i32, MVT::i1, Expand);
+    setTruncStoreAction(MVT::i16, MVT::i1, Expand);
+    setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+
+    addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
+  }
+
    // This is used in the ppcf128->int sequence.  Note it has different semantics
    // from FP_ROUND:  that rounds to nearest, this rounds to zero.
    setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
@@ -162,28 +185,24 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
          Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
      setOperationAction(ISD::FSQRT, MVT::f32, Expand);
  
-  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
-  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+  if (Subtarget->hasFCPSGN()) {
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
+  } else {
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+  }
  
    if (Subtarget->hasFPRND()) {
      setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
      setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
      setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+    setOperationAction(ISD::FROUND, MVT::f64, Legal);
  
      setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
      setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
      setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
-
-    // frin does not implement "ties to even." Thus, this is safe only in
-    // fast-math mode.
-    if (TM.Options.UnsafeFPMath) {
-      setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
-      setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
-
-      // These need to set FE_INEXACT, and use a custom inserter.
-      setOperationAction(ISD::FRINT, MVT::f64, Legal);
-      setOperationAction(ISD::FRINT, MVT::f32, Legal);
-    }
+    setOperationAction(ISD::FROUND, MVT::f32, Legal);
    }
  
    // PowerPC does not have BSWAP, CTPOP or CTTZ
@@ -208,21 +227,25 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    setOperationAction(ISD::ROTR, MVT::i32   , Expand);
    setOperationAction(ISD::ROTR, MVT::i64   , Expand);
  
-  // PowerPC does not have Select
-  setOperationAction(ISD::SELECT, MVT::i32, Expand);
-  setOperationAction(ISD::SELECT, MVT::i64, Expand);
-  setOperationAction(ISD::SELECT, MVT::f32, Expand);
-  setOperationAction(ISD::SELECT, MVT::f64, Expand);
+  if (!Subtarget->useCRBits()) {
+    // PowerPC does not have Select
+    setOperationAction(ISD::SELECT, MVT::i32, Expand);
+    setOperationAction(ISD::SELECT, MVT::i64, Expand);
+    setOperationAction(ISD::SELECT, MVT::f32, Expand);
+    setOperationAction(ISD::SELECT, MVT::f64, Expand);
+  }
  
    // PowerPC wants to turn select_cc of FP into fsel when possible.
    setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
    setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
  
    // PowerPC wants to optimize integer setcc a bit
-  setOperationAction(ISD::SETCC, MVT::i32, Custom);
+  if (!Subtarget->useCRBits())
+    setOperationAction(ISD::SETCC, MVT::i32, Custom);
  
    // PowerPC does not have BRCOND which requires SetCC
-  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+  if (!Subtarget->useCRBits())
+    setOperationAction(ISD::BRCOND, MVT::Other, Expand);
  
    setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
  
@@ -241,11 +264,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    // We cannot sextinreg(i1).  Expand to shifts.
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
  
-  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
-  setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
-  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
-  setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
-
    // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
    // SjLj exception handling but a light-weight setjmp/longjmp replacement to
    // support continuation, user-level threading, and etc.. As a result, no
@@ -298,8 +316,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    } else
      setOperationAction(ISD::VAARG, MVT::Other, Expand);
  
+  if (Subtarget->isSVR4ABI() && !isPPC64)
+    // VACOPY is custom lowered with the 32-bit SVR4 ABI.
+    setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
+  else
+    setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
+
    // Use the default implementation.
-  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
    setOperationAction(ISD::VAEND             , MVT::Other, Expand);
    setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
    setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
@@ -309,6 +332,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    // We want to custom lower some of our intrinsics.
    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  
+  // To handle counter-based loop conditions.
+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
+
    // Comparisons that require checking two conditions.
    setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
    setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
@@ -407,6 +433,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
        setOperationAction(ISD::UDIV, VT, Expand);
        setOperationAction(ISD::UREM, VT, Expand);
        setOperationAction(ISD::FDIV, VT, Expand);
+      setOperationAction(ISD::FREM, VT, Expand);
        setOperationAction(ISD::FNEG, VT, Expand);
        setOperationAction(ISD::FSQRT, VT, Expand);
        setOperationAction(ISD::FLOG, VT, Expand);
@@ -458,7 +485,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
      setOperationAction(ISD::OR    , MVT::v4i32, Legal);
      setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
      setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
-    setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+    setOperationAction(ISD::SELECT, MVT::v4i32,
+                       Subtarget->useCRBits() ? Legal : Expand);
      setOperationAction(ISD::STORE , MVT::v4i32, Legal);
      setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
      setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
@@ -477,7 +505,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
      setOperationAction(ISD::MUL, MVT::v4f32, Legal);
      setOperationAction(ISD::FMA, MVT::v4f32, Legal);
  
-    if (TM.Options.UnsafeFPMath) {
+    if (TM.Options.UnsafeFPMath || Subtarget->hasVSX()) {
        setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
        setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
      }
@@ -501,6 +529,86 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
      setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
      setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
      setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
+
+    setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
+    setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
+
+    if (Subtarget->hasVSX()) {
+      setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+
+      setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+      setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+      setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+      setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+      setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+
+      setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+      setOperationAction(ISD::MUL, MVT::v2f64, Legal);
+      setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+
+      setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+      setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+
+      setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
+      setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
+      setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
+      setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+      setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
+
+      // Share the Altivec comparison restrictions.
+      setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
+      setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
+      setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
+      setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
+      setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
+      setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
+
+      setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
+      setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
+
+      setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
+      setOperationAction(ISD::STORE, MVT::v2f64, Legal);
+
+      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
+
+      addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
+
+      addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
+      addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
+
+      // VSX v2i64 only supports non-arithmetic operations.
+      setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+      setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+
+      setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+      setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+      setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+
+      setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+
+      setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
+      AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
+      setOperationAction(ISD::STORE, MVT::v2i64, Promote);
+      AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
+
+      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
+
+      setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+      setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+      setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+      setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+
+      // Vector operation legalization checks the result type of
+      // SIGN_EXTEND_INREG, overall legalization checks the inner type.
+      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
+      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
+      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
+      setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
+
+      addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
+    }
    }
  
    if (Subtarget->has64BitSupport()) {
@@ -514,7 +622,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
  
    setBooleanContents(ZeroOrOneBooleanContent);
-  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
+  // Altivec instructions set fields to all zeros or all ones.
+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
  
    if (isPPC64) {
      setStackPointerRegisterToSaveRestore(PPC::X1);
@@ -528,9 +637,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
  
    // We have target-specific dag combine patterns for the following nodes:
    setTargetDAGCombine(ISD::SINT_TO_FP);
+  setTargetDAGCombine(ISD::LOAD);
    setTargetDAGCombine(ISD::STORE);
    setTargetDAGCombine(ISD::BR_CC);
+  if (Subtarget->useCRBits())
+    setTargetDAGCombine(ISD::BRCOND);
    setTargetDAGCombine(ISD::BSWAP);
+  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+
+  setTargetDAGCombine(ISD::SIGN_EXTEND);
+  setTargetDAGCombine(ISD::ZERO_EXTEND);
+  setTargetDAGCombine(ISD::ANY_EXTEND);
+
+  if (Subtarget->useCRBits()) {
+    setTargetDAGCombine(ISD::TRUNCATE);
+    setTargetDAGCombine(ISD::SETCC);
+    setTargetDAGCombine(ISD::SELECT_CC);
+  }
  
    // Use reciprocal estimates.
    if (TM.Options.UnsafeFPMath) {
@@ -552,6 +675,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
      setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
    }
  
+  // With 32 condition bits, we don't need to sink (and duplicate) compares
+  // aggressively in CodeGenPrep.
+  if (Subtarget->useCRBits())
+    setHasMultipleConditionRegisters();
+
    setMinFunctionAlignment(2);
    if (PPCSubTarget.isDarwin())
      setPrefFunctionAlignment(4);
@@ -563,7 +691,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
  
    setInsertFencesForAtomic(true);
  
-  setSchedulingPreference(Sched::Hybrid);
+  if (Subtarget->enableMachineScheduler())
+    setSchedulingPreference(Sched::Source);
+  else
+    setSchedulingPreference(Sched::Hybrid);
  
    computeRegisterProperties();
  
@@ -582,29 +713,52 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    }
  }
  
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
+                             unsigned MaxMaxAlign) {
+  if (MaxAlign == MaxMaxAlign)
+    return;
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+    if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
+      MaxAlign = 32;
+    else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
+      MaxAlign = 16;
+  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    unsigned EltAlign = 0;
+    getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
+    if (EltAlign > MaxAlign)
+      MaxAlign = EltAlign;
+  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      unsigned EltAlign = 0;
+      getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
+      if (EltAlign > MaxAlign)
+        MaxAlign = EltAlign;
+      if (MaxAlign == MaxMaxAlign)
+        break;
+    }
+  }
+}
+
  /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
  /// function arguments in the caller parameter area.
  unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
-  const TargetMachine &TM = getTargetMachine();
    // Darwin passes everything on 4 byte boundary.
-  if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+  if (PPCSubTarget.isDarwin())
      return 4;
  
    // 16byte and wider vectors are passed on 16byte boundary.
-  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-    if (VTy->getBitWidth() >= 128)
-      return 16;
-
    // The rest is 8 on PPC64 and 4 on PPC32 boundary.
-   if (PPCSubTarget.isPPC64())
-     return 8;
-
-  return 4;
+  unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
+  if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
+    getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
+  return Align;
  }
  
  const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    switch (Opcode) {
-  default: return 0;
+  default: return nullptr;
    case PPCISD::FSEL:            return "PPCISD::FSEL";
    case PPCISD::FCFID:           return "PPCISD::FCFID";
    case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
@@ -633,7 +787,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
    case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
    case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
-  case PPCISD::MFCR:            return "PPCISD::MFCR";
+  case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
    case PPCISD::VCMP:            return "PPCISD::VCMP";
    case PPCISD::VCMPo:           return "PPCISD::VCMPo";
    case PPCISD::LBRX:            return "PPCISD::LBRX";
@@ -641,6 +795,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case PPCISD::LARX:            return "PPCISD::LARX";
    case PPCISD::STCX:            return "PPCISD::STCX";
    case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
+  case PPCISD::BDNZ:            return "PPCISD::BDNZ";
+  case PPCISD::BDZ:             return "PPCISD::BDZ";
    case PPCISD::MFFS:            return "PPCISD::MFFS";
    case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
    case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
@@ -649,6 +805,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case PPCISD::ADDIS_TOC_HA:    return "PPCISD::ADDIS_TOC_HA";
    case PPCISD::LD_TOC_L:        return "PPCISD::LD_TOC_L";
    case PPCISD::ADDI_TOC_L:      return "PPCISD::ADDI_TOC_L";
+  case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
    case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
    case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
    case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
@@ -661,12 +818,13 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
    case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
    case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
+  case PPCISD::SC:              return "PPCISD::SC";
    }
  }
  
-EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
    if (!VT.isVector())
-    return MVT::i32;
+    return PPCSubTarget.useCRBits() ? MVT::i1 : MVT::i32;
    return VT.changeVectorElementTypeToInteger();
  }
  
@@ -732,8 +890,8 @@ bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
  ///
  static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
                       unsigned LHSStart, unsigned RHSStart) {
-  assert(N->getValueType(0) == MVT::v16i8 &&
-         "PPC only supports shuffles by bytes!");
+  if (N->getValueType(0) != MVT::v16i8)
+    return false;
    assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
           "Unsupported merge size!");
  
@@ -770,8 +928,8 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
  /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
  /// amount, otherwise return -1.
  int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
-  assert(N->getValueType(0) == MVT::v16i8 &&
-         "PPC only supports shuffles by bytes!");
+  if (N->getValueType(0) != MVT::v16i8)
+    return -1;
  
    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
  
@@ -861,7 +1019,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
  /// the constant being splatted.  The ByteSize field indicates the number of
  /// bytes of each element [124] -> [bhw].
  SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
-  SDValue OpVal(0, 0);
+  SDValue OpVal(nullptr, 0);
  
    // If ByteSize of the splat is bigger than the element size of the
    // build_vector, then we have a case where we are checking for a splat where
@@ -880,7 +1038,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
        if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
  
  
-      if (UniquedVals[i&(Multiple-1)].getNode() == 0)
+      if (!UniquedVals[i&(Multiple-1)].getNode())
          UniquedVals[i&(Multiple-1)] = N->getOperand(i);
        else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
          return SDValue();  // no match.
@@ -895,21 +1053,21 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
      bool LeadingZero = true;
      bool LeadingOnes = true;
      for (unsigned i = 0; i != Multiple-1; ++i) {
-      if (UniquedVals[i].getNode() == 0) continue;  // Must have been undefs.
+      if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
  
        LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
        LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
      }
      // Finally, check the least significant entry.
      if (LeadingZero) {
-      if (UniquedVals[Multiple-1].getNode() == 0)
+      if (!UniquedVals[Multiple-1].getNode())
          return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
        int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
        if (Val < 16)
          return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
      }
      if (LeadingOnes) {
-      if (UniquedVals[Multiple-1].getNode() == 0)
+      if (!UniquedVals[Multiple-1].getNode())
          return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
        int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
        if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
@@ -922,13 +1080,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
    // Check to see if this buildvec has a single non-undef value in its elements.
    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
      if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
-    if (OpVal.getNode() == 0)
+    if (!OpVal.getNode())
        OpVal = N->getOperand(i);
      else if (OpVal != N->getOperand(i))
        return SDValue();
    }
  
-  if (OpVal.getNode() == 0) return SDValue();  // All UNDEF: use implicit def.
+  if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
  
    unsigned ValSizeInBytes = EltSize;
    uint64_t Value = 0;
@@ -1035,24 +1193,68 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
    return false;
  }
  
+// If we happen to be doing an i64 load or store into a stack slot that has
+// less than a 4-byte alignment, then the frame-index elimination may need to
+// use an indexed load or store instruction (because the offset may not be a
+// multiple of 4). The extra register needed to hold the offset comes from the
+// register scavenger, and it is possible that the scavenger will need to use
+// an emergency spill slot. As a result, we need to make sure that a spill slot
+// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
+// stack slot.
+static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
+  // FIXME: This does not handle the LWA case.
+  if (VT != MVT::i64)
+    return;
+
+  // NOTE: We'll exclude negative FIs here, which come from argument
+  // lowering, because there are no known test cases triggering this problem
+  // using packed structures (or similar). We can remove this exclusion if
+  // we find such a test case. The reason why this is so test-case driven is
+  // because this entire 'fixup' is only to prevent crashes (from the
+  // register scavenger) on not-really-valid inputs. For example, if we have:
+  //   %a = alloca i1
+  //   %b = bitcast i1* %a to i64*
+  //   store i64* a, i64 b
+  // then the store should really be marked as 'align 1', but is not. If it
+  // were marked as 'align 1' then the indexed form would have been
+  // instruction-selected initially, and the problem this 'fixup' is preventing
+  // won't happen regardless.
+  if (FrameIdx < 0)
+    return;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  unsigned Align = MFI->getObjectAlignment(FrameIdx);
+  if (Align >= 4)
+    return;
+
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  FuncInfo->setHasNonRISpills();
+}
+
  /// Returns true if the address N can be represented by a base register plus
  /// a signed 16-bit displacement [r+imm], and if it is not better
-/// represented as reg+reg.
+/// represented as reg+reg.  If Aligned is true, only accept displacements
+/// suitable for STD and friends, i.e. multiples of 4.
  bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
                                              SDValue &Base,
-                                            SelectionDAG &DAG) const {
+                                            SelectionDAG &DAG,
+                                            bool Aligned) const {
    // FIXME dl should come from parent load or store, not from address
-  DebugLoc dl = N.getDebugLoc();
+  SDLoc dl(N);
    // If this can be more profitably realized as r+r, fail.
    if (SelectAddressRegReg(N, Disp, Base, DAG))
      return false;
  
    if (N.getOpcode() == ISD::ADD) {
      short imm = 0;
-    if (isIntS16Immediate(N.getOperand(1), imm)) {
-      Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+    if (isIntS16Immediate(N.getOperand(1), imm) &&
+        (!Aligned || (imm & 3) == 0)) {
+      Disp = DAG.getTargetConstant(imm, N.getValueType());
        if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
          Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+        fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
        } else {
          Base = N.getOperand(0);
        }
@@ -1071,7 +1273,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
      }
    } else if (N.getOpcode() == ISD::OR) {
      short imm = 0;
-    if (isIntS16Immediate(N.getOperand(1), imm)) {
+    if (isIntS16Immediate(N.getOperand(1), imm) &&
+        (!Aligned || (imm & 3) == 0)) {
        // If this is an or of disjoint bitfields, we can codegen this as an add
        // (for better address arithmetic) if the LHS and RHS of the OR are
        // provably disjoint.
@@ -1082,7 +1285,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
          // If all of the bits are known zero on the LHS or RHS, the add won't
          // carry.
          Base = N.getOperand(0);
-        Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+        Disp = DAG.getTargetConstant(imm, N.getValueType());
          return true;
        }
      }
@@ -1092,7 +1295,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
      // If this address fits entirely in a 16-bit sext immediate field, codegen
      // this as "d, 0"
      short Imm;
-    if (isIntS16Immediate(CN, Imm)) {
+    if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
        Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
        Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
                               CN->getValueType(0));
@@ -1100,8 +1303,9 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
      }
  
      // Handle 32-bit sext immediates with LIS + addr mode.
-    if (CN->getValueType(0) == MVT::i32 ||
-        (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+    if ((CN->getValueType(0) == MVT::i32 ||
+         (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
+        (!Aligned || (CN->getZExtValue() & 3) == 0)) {
        int Addr = (int)CN->getZExtValue();
  
        // Otherwise, break this down into an LIS + disp.
@@ -1115,9 +1319,10 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
    }
  
    Disp = DAG.getTargetConstant(0, getPointerTy());
-  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
      Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
-  else
+    fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+  } else
      Base = N;
    return true;      // [r+0]
  }
@@ -1149,92 +1354,6 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
    return true;
  }
  
-/// SelectAddressRegImmShift - Returns true if the address N can be
-/// represented by a base register plus a signed 14-bit displacement
-/// [r+imm*4].  Suitable for use by STD and friends.
-bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
-                                                 SDValue &Base,
-                                                 SelectionDAG &DAG) const {
-  // FIXME dl should come from the parent load or store, not the address
-  DebugLoc dl = N.getDebugLoc();
-  // If this can be more profitably realized as r+r, fail.
-  if (SelectAddressRegReg(N, Disp, Base, DAG))
-    return false;
-
-  if (N.getOpcode() == ISD::ADD) {
-    short imm = 0;
-    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
-      Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
-      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
-        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
-      } else {
-        Base = N.getOperand(0);
-      }
-      return true; // [r+i]
-    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
-      // Match LOAD (ADD (X, Lo(G))).
-      assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
-             && "Cannot handle constant offsets yet!");
-      Disp = N.getOperand(1).getOperand(0);  // The global address.
-      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
-             Disp.getOpcode() == ISD::TargetConstantPool ||
-             Disp.getOpcode() == ISD::TargetJumpTable);
-      Base = N.getOperand(0);
-      return true;  // [&g+r]
-    }
-  } else if (N.getOpcode() == ISD::OR) {
-    short imm = 0;
-    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
-      // If this is an or of disjoint bitfields, we can codegen this as an add
-      // (for better address arithmetic) if the LHS and RHS of the OR are
-      // provably disjoint.
-      APInt LHSKnownZero, LHSKnownOne;
-      DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
-      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
-        // If all of the bits are known zero on the LHS or RHS, the add won't
-        // carry.
-        Base = N.getOperand(0);
-        Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
-        return true;
-      }
-    }
-  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
-    // Loading from a constant address.  Verify low two bits are clear.
-    if ((CN->getZExtValue() & 3) == 0) {
-      // If this address fits entirely in a 14-bit sext immediate field, codegen
-      // this as "d, 0"
-      short Imm;
-      if (isIntS16Immediate(CN, Imm)) {
-        Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
-        Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
-                               CN->getValueType(0));
-        return true;
-      }
-
-      // Fold the low-part of 32-bit absolute addresses into addr mode.
-      if (CN->getValueType(0) == MVT::i32 ||
-          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
-        int Addr = (int)CN->getZExtValue();
-
-        // Otherwise, break this down into an LIS + disp.
-        Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
-        Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
-        unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
-        Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
-        return true;
-      }
-    }
-  }
-
-  Disp = DAG.getTargetConstant(0, getPointerTy());
-  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
-    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
-  else
-    Base = N;
-  return true;      // [r+0]
-}
-
-
  /// getPreIndexedAddressParts - returns true by value, base pointer and
  /// offset pointer and addressing mode by reference if the node's address
  /// can be legally represented as pre-indexed load / store address.
@@ -1287,18 +1406,16 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
      return true;
    }
  
-  // LDU/STU use reg+imm*4, others use reg+imm.
+  // LDU/STU can only handle immediates that are a multiple of 4.
    if (VT != MVT::i64) {
-    // reg + imm
-    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
+    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
        return false;
    } else {
      // LDU/STU need an address with at least 4-byte alignment.
      if (Alignment < 4)
        return false;
  
-    // reg + imm * 4.
-    if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
+    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
        return false;
    }
  
@@ -1322,9 +1439,10 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
  /// GetLabelAccessInfo - Return true if we should reference labels using a
  /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
  static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
-                               unsigned &LoOpFlags, const GlobalValue *GV = 0) {
-  HiOpFlags = PPCII::MO_HA16;
-  LoOpFlags = PPCII::MO_LO16;
+                               unsigned &LoOpFlags,
+                               const GlobalValue *GV = nullptr) {
+  HiOpFlags = PPCII::MO_HA;
+  LoOpFlags = PPCII::MO_LO;
  
    // Don't use the pic base if not in PIC relocation model.  Or if we are on a
    // non-darwin platform.  We don't support PIC on other platforms yet.
@@ -1354,7 +1472,7 @@ static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
                               SelectionDAG &DAG) {
    EVT PtrVT = HiPart.getValueType();
    SDValue Zero = DAG.getConstant(0, PtrVT);
-  DebugLoc DL = HiPart.getDebugLoc();
+  SDLoc DL(HiPart);
  
    SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
    SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
@@ -1379,7 +1497,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
    // The actual address of the GlobalValue is stored in the TOC.
    if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
      SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
-    return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA,
+    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
                         DAG.getRegister(PPC::X2, MVT::i64));
    }
  
@@ -1400,7 +1518,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
    // The actual address of the GlobalValue is stored in the TOC.
    if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
      SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
-    return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA,
+    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
                         DAG.getRegister(PPC::X2, MVT::i64));
    }
  
@@ -1427,8 +1545,12 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
  SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
  
+  // FIXME: TLS addresses currently use medium model code sequences,
+  // which is the most useful form.  Eventually support for small and
+  // large models could be added if users need it, at the cost of
+  // additional complexity.
    GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
-  DebugLoc dl = GA->getDebugLoc();
+  SDLoc dl(GA);
    const GlobalValue *GV = GA->getGlobal();
    EVT PtrVT = getPointerTy();
    bool is64bit = PPCSubTarget.isPPC64();
@@ -1437,26 +1559,29 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
  
    if (Model == TLSModel::LocalExec) {
      SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                               PPCII::MO_TPREL16_HA);
+                                               PPCII::MO_TPREL_HA);
      SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                               PPCII::MO_TPREL16_LO);
+                                               PPCII::MO_TPREL_LO);
      SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
                                       is64bit ? MVT::i64 : MVT::i32);
      SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
      return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
    }
  
-  if (!is64bit)
-    llvm_unreachable("only local-exec is currently supported for ppc32");
-
    if (Model == TLSModel::InitialExec) {
      SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
-    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
-    SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
-                                     PtrVT, GOTReg, TGA);
+    SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                                PPCII::MO_TLS);
+    SDValue GOTPtr;
+    if (is64bit) {
+      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+      GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+                           PtrVT, GOTReg, TGA);
+    } else
+      GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
      SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
-                                   PtrVT, TGA, TPOffsetHi);
-    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+                                   PtrVT, TGA, GOTPtr);
+    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
    }
  
    if (Model == TLSModel::GeneralDynamic) {
@@ -1514,7 +1639,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
    EVT PtrVT = Op.getValueType();
    GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
-  DebugLoc DL = GSDN->getDebugLoc();
+  SDLoc DL(GSDN);
    const GlobalValue *GV = GSDN->getGlobal();
  
    // 64-bit SVR4 ABI code is always position-independent.
@@ -1545,7 +1670,28 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
  
  SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
+
+  if (Op.getValueType() == MVT::v2i64) {
+    // When the operands themselves are v2i64 values, we need to do something
+    // special because VSX has no underlying comparison operations for these.
+    if (Op.getOperand(0).getValueType() == MVT::v2i64) {
+      // Equality can be handled by casting to the legal type for Altivec
+      // comparisons, everything else needs to be expanded.
+      if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+        return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+                 DAG.getSetCC(dl, MVT::v4i32,
+                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
+                   DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
+                   CC));
+      }
+
+      return SDValue();
+    }
+
+    // We handle most of these in the usual way.
+    return Op;
+  }
  
    // If we're comparing for equality to zero, expose the fact that this is
    // implented as a ctlz/srl pair on ppc, so that the dag combiner can
@@ -1594,7 +1740,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
    SDValue InChain = Node->getOperand(0);
    SDValue VAListPtr = Node->getOperand(1);
    const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
-  DebugLoc dl = Node->getDebugLoc();
+  SDLoc dl(Node);
  
    assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
  
@@ -1694,6 +1840,18 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
                       false, false, false, 0);
  }
  
+SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
+                                       const PPCSubtarget &Subtarget) const {
+  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
+
+  // We have to copy the entire va_list struct:
+  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
+  return DAG.getMemcpy(Op.getOperand(0), Op,
+                       Op.getOperand(1), Op.getOperand(2),
+                       DAG.getConstant(12, MVT::i32), 8, false, true,
+                       MachinePointerInfo(), MachinePointerInfo());
+}
+
  SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
                                                    SelectionDAG &DAG) const {
    return Op.getOperand(0);
@@ -1705,7 +1863,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
    SDValue Trmp = Op.getOperand(1); // trampoline
    SDValue FPtr = Op.getOperand(2); // nested function
    SDValue Nest = Op.getOperand(3); // 'nest' parameter value
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
  
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    bool isPPC64 = (PtrVT == MVT::i64);
@@ -1747,7 +1905,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
    MachineFunction &MF = DAG.getMachineFunction();
    PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
  
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
  
    if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
      // vastart just stores the address of the VarArgsFrameIndex slot into the
@@ -1841,19 +1999,25 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
  
  #include "PPCGenCallingConv.inc"
  
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                       CCValAssign::LocInfo &LocInfo,
-                                       ISD::ArgFlagsTy &ArgFlags,
-                                       CCState &State) {
+// Function whose sole purpose is to kill compiler warnings 
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
+  return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
+}
+
+bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                      CCValAssign::LocInfo &LocInfo,
+                                      ISD::ArgFlagsTy &ArgFlags,
+                                      CCState &State) {
    return true;
  }
  
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                              MVT &LocVT,
-                                              CCValAssign::LocInfo &LocInfo,
-                                              ISD::ArgFlagsTy &ArgFlags,
-                                              CCState &State) {
-  static const uint16_t ArgRegs[] = {
+bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                             MVT &LocVT,
+                                             CCValAssign::LocInfo &LocInfo,
+                                             ISD::ArgFlagsTy &ArgFlags,
+                                             CCState &State) {
+  static const MCPhysReg ArgRegs[] = {
      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
    };
@@ -1875,12 +2039,12 @@ static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
    return false;
  }
  
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                                MVT &LocVT,
-                                                CCValAssign::LocInfo &LocInfo,
-                                                ISD::ArgFlagsTy &ArgFlags,
-                                                CCState &State) {
-  static const uint16_t ArgRegs[] = {
+bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                               MVT &LocVT,
+                                               CCValAssign::LocInfo &LocInfo,
+                                               ISD::ArgFlagsTy &ArgFlags,
+                                               CCState &State) {
+  static const MCPhysReg ArgRegs[] = {
      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
      PPC::F8
    };
@@ -1904,8 +2068,8 @@ static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
  
  /// GetFPR - Get the set of FP registers that should be allocated for arguments,
  /// on Darwin.
-static const uint16_t *GetFPR() {
-  static const uint16_t FPR[] = {
+static const MCPhysReg *GetFPR() {
+  static const MCPhysReg FPR[] = {
      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
      PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
    };
@@ -1917,7 +2081,7 @@ static const uint16_t *GetFPR() {
  /// the stack.
  static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
                                         unsigned PtrByteSize) {
-  unsigned ArgSize = ArgVT.getSizeInBits()/8;
+  unsigned ArgSize = ArgVT.getStoreSize();
    if (Flags.isByVal())
      ArgSize = Flags.getByValSize();
    ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
@@ -1930,7 +2094,7 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
                                          CallingConv::ID CallConv, bool isVarArg,
                                          const SmallVectorImpl<ISD::InputArg>
                                            &Ins,
-                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SDLoc dl, SelectionDAG &DAG,
                                          SmallVectorImpl<SDValue> &InVals)
                                            const {
    if (PPCSubTarget.isSVR4ABI()) {
@@ -1952,7 +2116,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
                                        CallingConv::ID CallConv, bool isVarArg,
                                        const SmallVectorImpl<ISD::InputArg>
                                          &Ins,
-                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SDLoc dl, SelectionDAG &DAG,
                                        SmallVectorImpl<SDValue> &InVals) const {
  
    // 32-bit SVR4 ABI Stack Frame Layout:
@@ -2015,6 +2179,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
        switch (ValVT.getSimpleVT().SimpleTy) {
          default:
            llvm_unreachable("ValVT not supported by formal arguments Lowering");
+        case MVT::i1:
          case MVT::i32:
            RC = &PPC::GPRCRegClass;
            break;
@@ -2022,7 +2187,10 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
            RC = &PPC::F4RCRegClass;
            break;
          case MVT::f64:
-          RC = &PPC::F8RCRegClass;
+          if (PPCSubTarget.hasVSX())
+            RC = &PPC::VSFRCRegClass;
+          else
+            RC = &PPC::F8RCRegClass;
            break;
          case MVT::v16i8:
          case MVT::v8i16:
@@ -2030,18 +2198,26 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
          case MVT::v4f32:
            RC = &PPC::VRRCRegClass;
            break;
+        case MVT::v2f64:
+        case MVT::v2i64:
+          RC = &PPC::VSHRCRegClass;
+          break;
        }
  
        // Transform the arguments stored in physical registers into virtual ones.
        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
+                                            ValVT == MVT::i1 ? MVT::i32 : ValVT);
+
+      if (ValVT == MVT::i1)
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
  
        InVals.push_back(ArgValue);
      } else {
        // Argument stored in memory.
        assert(VA.isMemLoc());
  
-      unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+      unsigned ArgSize = VA.getLocVT().getStoreSize();
        int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
                                        isImmutable);
  
@@ -2090,13 +2266,13 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
    // If the function takes variable number of arguments, make a frame index for
    // the start of the first vararg value... for expansion of llvm.va_start.
    if (isVarArg) {
-    static const uint16_t GPArgRegs[] = {
+    static const MCPhysReg GPArgRegs[] = {
        PPC::R3, PPC::R4, PPC::R5, PPC::R6,
        PPC::R7, PPC::R8, PPC::R9, PPC::R10,
      };
      const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
  
-    static const uint16_t FPArgRegs[] = {
+    static const MCPhysReg FPArgRegs[] = {
        PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
        PPC::F8
      };
@@ -2158,8 +2334,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
    }
  
    if (!MemOps.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, dl,
-                        MVT::Other, &MemOps[0], MemOps.size());
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
  
    return Chain;
  }
@@ -2169,15 +2344,15 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
  SDValue
  PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
                                       SelectionDAG &DAG, SDValue ArgVal,
-                                     DebugLoc dl) const {
+                                     SDLoc dl) const {
    if (Flags.isSExt())
      ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
                           DAG.getValueType(ObjectVT));
    else if (Flags.isZExt())
      ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
                           DAG.getValueType(ObjectVT));
-  
-  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+
+  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
  }
  
  // Set the size that is at least reserved in caller of this function.  Tail
@@ -2212,7 +2387,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
                                        CallingConv::ID CallConv, bool isVarArg,
                                        const SmallVectorImpl<ISD::InputArg>
                                          &Ins,
-                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SDLoc dl, SelectionDAG &DAG,
                                        SmallVectorImpl<SDValue> &InVals) const {
    // TODO: add description of PPC stack frame format, or at least some docs.
    //
@@ -2230,17 +2405,21 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
    // Area that is at least reserved in caller of this function.
    unsigned MinReservedArea = ArgOffset;
  
-  static const uint16_t GPR[] = {
+  static const MCPhysReg GPR[] = {
      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
      PPC::X7, PPC::X8, PPC::X9, PPC::X10,
    };
  
-  static const uint16_t *FPR = GetFPR();
+  static const MCPhysReg *FPR = GetFPR();
  
-  static const uint16_t VR[] = {
+  static const MCPhysReg VR[] = {
      PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
      PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
    };
+  static const MCPhysReg VSRH[] = {
+    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+  };
  
    const unsigned Num_GPR_Regs = array_lengthof(GPR);
    const unsigned Num_FPR_Regs = 13;
@@ -2260,7 +2439,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
      SDValue ArgVal;
      bool needsLoad = false;
      EVT ObjectVT = Ins[ArgNo].VT;
-    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+    unsigned ObjSize = ObjectVT.getStoreSize();
      unsigned ArgSize = ObjSize;
      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
      std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
@@ -2270,7 +2449,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
  
      // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
      if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
-        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 ||
+        ObjectVT==MVT::v2f64 || ObjectVT==MVT::v2i64) {
        if (isVarArg) {
          MinReservedArea = ((MinReservedArea+15)/16)*16;
          MinReservedArea += CalculateStackSlotSize(ObjectVT,
@@ -2303,6 +2483,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
          InVals.push_back(FIN);
          continue;
        }
+
+      unsigned BVAlign = Flags.getByValAlign();
+      if (BVAlign > 8) {
+        ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+        CurArgOffset = ArgOffset;
+      }
+
        // All aggregates smaller than 8 bytes must be passed right-justified.
        if (ObjSize < PtrByteSize)
          CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
@@ -2321,7 +2508,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
              EVT ObjType = (ObjSize == 1 ? MVT::i8 :
                             (ObjSize == 2 ? MVT::i16 : MVT::i32));
              Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                                      MachinePointerInfo(FuncArg, CurArgOffset),
+                                      MachinePointerInfo(FuncArg),
                                        ObjType, false, false, 0);
            } else {
              // For sizes that don't fit a truncating store (3, 5, 6, 7),
@@ -2333,7 +2520,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
              int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
              SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
              Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                 MachinePointerInfo(FuncArg, ArgOffset),
+                                 MachinePointerInfo(FuncArg),
                                   false, false, 0);
            }
  
@@ -2357,7 +2544,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
            SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
            SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                       MachinePointerInfo(FuncArg, ArgOffset),
+                                       MachinePointerInfo(FuncArg, j),
                                         false, false, 0);
            MemOps.push_back(Store);
            ++GPR_idx;
@@ -2372,13 +2559,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
  
      switch (ObjectVT.getSimpleVT().SimpleTy) {
      default: llvm_unreachable("Unhandled argument type!");
+    case MVT::i1:
      case MVT::i32:
      case MVT::i64:
        if (GPR_idx != Num_GPR_Regs) {
          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
  
-        if (ObjectVT == MVT::i32)
+        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
            // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
            // value to MVT::i64 and then truncate to the correct register size.
            ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
@@ -2404,7 +2592,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
          if (ObjectVT == MVT::f32)
            VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
          else
-          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+          VReg = MF.addLiveIn(FPR[FPR_idx], PPCSubTarget.hasVSX() ?
+                                            &PPC::VSFRCRegClass :
+                                            &PPC::F8RCRegClass);
  
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
          ++FPR_idx;
@@ -2419,10 +2609,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
      case MVT::v4i32:
      case MVT::v8i16:
      case MVT::v16i8:
+    case MVT::v2f64:
+    case MVT::v2i64:
        // Note that vector arguments in registers don't reserve stack space,
        // except in varargs functions.
        if (VR_idx != Num_VR_Regs) {
-        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+        unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
+                        MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
+                        MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
          if (isVarArg) {
            while ((ArgOffset % 16) != 0) {
@@ -2489,8 +2683,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
    }
  
    if (!MemOps.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, dl,
-                        MVT::Other, &MemOps[0], MemOps.size());
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
  
    return Chain;
  }
@@ -2501,7 +2694,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
                                        CallingConv::ID CallConv, bool isVarArg,
                                        const SmallVectorImpl<ISD::InputArg>
                                          &Ins,
-                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SDLoc dl, SelectionDAG &DAG,
                                        SmallVectorImpl<SDValue> &InVals) const {
    // TODO: add description of PPC stack frame format, or at least some docs.
    //
@@ -2520,18 +2713,18 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
    // Area that is at least reserved in caller of this function.
    unsigned MinReservedArea = ArgOffset;
  
-  static const uint16_t GPR_32[] = {           // 32-bit registers.
+  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
    };
-  static const uint16_t GPR_64[] = {           // 64-bit registers.
+  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
      PPC::X7, PPC::X8, PPC::X9, PPC::X10,
    };
  
-  static const uint16_t *FPR = GetFPR();
+  static const MCPhysReg *FPR = GetFPR();
  
-  static const uint16_t VR[] = {
+  static const MCPhysReg VR[] = {
      PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
      PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
    };
@@ -2542,7 +2735,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
  
    unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
  
-  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
+  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
  
    // In 32-bit non-varargs functions, the stack space for vectors is after the
    // stack space for non-vectors.  We do not use this space unless we have
@@ -2569,6 +2762,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
  
        switch(ObjectVT.getSimpleVT().SimpleTy) {
        default: llvm_unreachable("Unhandled argument type!");
+      case MVT::i1:
        case MVT::i32:
        case MVT::f32:
          VecArgOffset += 4;
@@ -2599,17 +2793,17 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
  
    SmallVector<SDValue, 8> MemOps;
    unsigned nAltivecParamsAtEnd = 0;
-  // FIXME: FuncArg and Ins[ArgNo] must reference the same argument.
-  // When passing anonymous aggregates, this is currently not true.
-  // See LowerFormalArguments_64SVR4 for a fix.
    Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
-  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+  unsigned CurArgIdx = 0;
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
      SDValue ArgVal;
      bool needsLoad = false;
      EVT ObjectVT = Ins[ArgNo].VT;
      unsigned ObjSize = ObjectVT.getSizeInBits()/8;
      unsigned ArgSize = ObjSize;
      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+    std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
+    CurArgIdx = Ins[ArgNo].OrigArgIndex;
  
      unsigned CurArgOffset = ArgOffset;
  
@@ -2653,8 +2847,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
            SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
            EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
            SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                                            MachinePointerInfo(FuncArg,
-                                              CurArgOffset),
+                                            MachinePointerInfo(FuncArg),
                                              ObjType, false, false, 0);
            MemOps.push_back(Store);
            ++GPR_idx;
@@ -2678,7 +2871,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
            SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
            SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                       MachinePointerInfo(FuncArg, ArgOffset),
+                                       MachinePointerInfo(FuncArg, j),
                                         false, false, 0);
            MemOps.push_back(Store);
            ++GPR_idx;
@@ -2693,11 +2886,16 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
  
      switch (ObjectVT.getSimpleVT().SimpleTy) {
      default: llvm_unreachable("Unhandled argument type!");
+    case MVT::i1:
      case MVT::i32:
        if (!isPPC64) {
          if (GPR_idx != Num_GPR_Regs) {
            unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
            ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+
+          if (ObjectVT == MVT::i1)
+            ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
+
            ++GPR_idx;
          } else {
            needsLoad = true;
@@ -2713,7 +2911,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
  
-        if (ObjectVT == MVT::i32)
+        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
            // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
            // value to MVT::i64 and then truncate to the correct register size.
            ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
@@ -2840,8 +3038,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
    }
  
    if (!MemOps.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, dl,
-                        MVT::Other, &MemOps[0], MemOps.size());
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
  
    return Chain;
  }
@@ -2876,7 +3073,8 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
      EVT ArgVT = Outs[i].VT;
      // Varargs Altivec parameters are padded to a 16 byte boundary.
      if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
-        ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+        ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8 ||
+        ArgVT==MVT::v2f64 || ArgVT==MVT::v2i64) {
        if (!isVarArg && !isPPC64) {
          // Non-varargs Altivec parameters go after all the non-Altivec
          // parameters; handle those later so we know how much padding we need.
@@ -2956,7 +3154,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
         if (Flags.isByVal()) return false;
      }
  
-    // Non PIC/GOT  tail calls are supported.
+    // Non-PIC/GOT tail calls are supported.
      if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
        return true;
  
@@ -2974,12 +3172,12 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
  /// 32-bit value is representable in the immediate field of a BxA instruction.
  static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
-  if (!C) return 0;
+  if (!C) return nullptr;
  
    int Addr = C->getZExtValue();
    if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
        SignExtend32<26>(Addr) != Addr)
-    return 0;  // Top 6 bits have to be sext of immediate.
+    return nullptr;  // Top 6 bits have to be sext of immediate.
  
    return DAG.getConstant((int)C->getZExtValue() >> 2,
                           DAG.getTargetLoweringInfo().getPointerTy()).getNode();
@@ -3001,9 +3199,9 @@ struct TailCallArgumentInfo {
  static void
  StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
                                             SDValue Chain,
-                   const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
-                   SmallVector<SDValue, 8> &MemOpChains,
-                   DebugLoc dl) {
+                   const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
+                   SmallVectorImpl<SDValue> &MemOpChains,
+                   SDLoc dl) {
    for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
      SDValue Arg = TailCallArgs[i].Arg;
      SDValue FIN = TailCallArgs[i].FrameIdxOp;
@@ -3025,7 +3223,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
                                                 int SPDiff,
                                                 bool isPPC64,
                                                 bool isDarwinABI,
-                                               DebugLoc dl) {
+                                               SDLoc dl) {
    if (SPDiff) {
      // Calculate the new stack slot for the return address.
      int SlotSize = isPPC64 ? 8 : 4;
@@ -3060,7 +3258,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
  static void
  CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
                           SDValue Arg, int SPDiff, unsigned ArgOffset,
-                      SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+                     SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
    int Offset = ArgOffset + SPDiff;
    uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
    int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
@@ -3082,7 +3280,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
                                                          SDValue &LROpOut,
                                                          SDValue &FPOpOut,
                                                          bool isDarwinABI,
-                                                        DebugLoc dl) const {
+                                                        SDLoc dl) const {
    if (SPDiff) {
      // Load the LR and FP stack slot for later adjusting.
      EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
@@ -3112,11 +3310,11 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
  static SDValue
  CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                            ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
-                          DebugLoc dl) {
+                          SDLoc dl) {
    SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
    return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
-                       false, false, MachinePointerInfo(0),
-                       MachinePointerInfo(0));
+                       false, false, MachinePointerInfo(),
+                       MachinePointerInfo());
  }
  
  /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
@@ -3125,9 +3323,9 @@ static void
  LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
                   SDValue Arg, SDValue PtrOff, int SPDiff,
                   unsigned ArgOffset, bool isPPC64, bool isTailCall,
-                 bool isVector, SmallVector<SDValue, 8> &MemOpChains,
-                 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
-                 DebugLoc dl) {
+                 bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
+                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
+                 SDLoc dl) {
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    if (!isTailCall) {
      if (isVector) {
@@ -3148,9 +3346,9 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
  
  static
  void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
-                     DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+                     SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
                       SDValue LROp, SDValue FPOp, bool isDarwinABI,
-                     SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+                     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
    MachineFunction &MF = DAG.getMachineFunction();
  
    // Emit a sequence of copyto/copyfrom virtual registers for arguments that
@@ -3161,8 +3359,7 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
    StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
                                      MemOpChains2, dl);
    if (!MemOpChains2.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                        &MemOpChains2[0], MemOpChains2.size());
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
  
    // Store the return address to the appropriate stack slot.
    Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
@@ -3170,15 +3367,15 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
  
    // Emit callseq_end just before tailcall node.
    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(0, true), InFlag);
+                             DAG.getIntPtrConstant(0, true), InFlag, dl);
    InFlag = Chain.getValue(1);
  }
  
  static
  unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
-                     SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
-                     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
-                     SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
+                     SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
+                     SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
+                     SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
                       const PPCSubtarget &PPCSubTarget) {
  
    bool isPPC64 = PPCSubTarget.isPPC64();
@@ -3276,8 +3473,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
        // Load the address of the function entry point from the function
        // descriptor.
        SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
-      SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
-                                        InFlag.getNode() ? 3 : 2);
+      SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs,
+                         ArrayRef<SDValue>(MTCTROps, InFlag.getNode() ? 3 : 2));
        Chain = LoadFuncPtr.getValue(1);
        InFlag = LoadFuncPtr.getValue(2);
  
@@ -3313,8 +3510,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
        MTCTROps[2] = InFlag;
      }
  
-    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
-                        2 + (InFlag.getNode() != 0));
+    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
+                        ArrayRef<SDValue>(MTCTROps, InFlag.getNode() ? 3 : 2));
      InFlag = Chain.getValue(1);
  
      NodeTys.clear();
@@ -3322,7 +3519,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
      NodeTys.push_back(MVT::Glue);
      Ops.push_back(Chain);
      CallOpc = PPCISD::BCTRL;
-    Callee.setNode(0);
+    Callee.setNode(nullptr);
      // Add use of X11 (holding environment pointer)
      if (isSVR4ABI && isPPC64)
        Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
@@ -3362,7 +3559,7 @@ SDValue
  PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                     CallingConv::ID CallConv, bool isVarArg,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
-                                   DebugLoc dl, SelectionDAG &DAG,
+                                   SDLoc dl, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &InVals) const {
  
    SmallVector<CCValAssign, 16> RVLocs;
@@ -3405,7 +3602,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
  }
  
  SDValue
-PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
+PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
                                bool isTailCall, bool isVarArg,
                                SelectionDAG &DAG,
                                SmallVector<std::pair<unsigned, SDValue>, 8>
@@ -3450,7 +3647,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
              isa<ConstantSDNode>(Callee)) &&
      "Expecting an global address, external symbol, absolute value or register");
  
-    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
+    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
    }
  
    // Add a NOP immediately after the branch instruction when using the 64-bit
@@ -3475,13 +3672,15 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
        // from allocating it), resulting in an additional register being
        // allocated and an unnecessary move instruction being generated.
        needsTOCRestore = true;
-    } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
+    } else if ((CallOpc == PPCISD::CALL) &&
+               (!isLocalCall(Callee) ||
+                DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
        // Otherwise insert NOP for non-local calls.
        CallOpc = PPCISD::CALL_NOP;
      }
    }
  
-  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+  Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
    InFlag = Chain.getValue(1);
  
    if (needsTOCRestore) {
@@ -3492,7 +3691,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
  
    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
                               DAG.getIntPtrConstant(BytesCalleePops, true),
-                             InFlag);
+                             InFlag, dl);
    if (!Ins.empty())
      InFlag = Chain.getValue(1);
  
@@ -3504,10 +3703,10 @@ SDValue
  PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                               SmallVectorImpl<SDValue> &InVals) const {
    SelectionDAG &DAG                     = CLI.DAG;
-  DebugLoc &dl                          = CLI.DL;
-  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
-  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
-  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
+  SDLoc &dl                             = CLI.DL;
+  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
+  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
    SDValue Chain                         = CLI.Chain;
    SDValue Callee                        = CLI.Callee;
    bool &isTailCall                      = CLI.IsTailCall;
@@ -3518,6 +3717,10 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
      isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
                                                     Ins, DAG);
  
+  if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+    report_fatal_error("failed to perform tail call elimination on a call "
+                       "site marked musttail");
+
    if (PPCSubTarget.isSVR4ABI()) {
      if (PPCSubTarget.isPPC64())
        return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
@@ -3541,7 +3744,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                      const SmallVectorImpl<SDValue> &OutVals,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SDLoc dl, SelectionDAG &DAG,
                                      SmallVectorImpl<SDValue> &InVals) const {
    // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
    // of the 32-bit SVR4 ABI stack frame layout.
@@ -3598,7 +3801,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
          errs() << "Call operand #" << i << " has unhandled type "
               << EVT(ArgVT).getEVTString() << "\n";
  #endif
-        llvm_unreachable(0);
+        llvm_unreachable(nullptr);
        }
      }
    } else {
@@ -3627,7 +3830,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
  
    // Adjust the stack pointer for the new arguments...
    // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               dl);
    SDValue CallSeqStart = Chain;
  
    // Load the return address and frame pointer so it can be moved somewhere else
@@ -3678,7 +3882,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
  
        // This must go outside the CALLSEQ_START..END.
        SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
-                           CallSeqStart.getNode()->getOperand(1));
+                           CallSeqStart.getNode()->getOperand(1),
+                           SDLoc(MemcpyCall));
        DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
                               NewCallSeqStart.getNode());
        Chain = CallSeqStart = NewCallSeqStart;
@@ -3690,6 +3895,9 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
      }
  
      if (VA.isRegLoc()) {
+      if (Arg.getValueType() == MVT::i1)
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
+
        seenFloatArg |= VA.getLocVT().isFloatingPoint();
        // Put argument in a physical register.
        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
@@ -3714,8 +3922,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
    }
  
    if (!MemOpChains.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                        &MemOpChains[0], MemOpChains.size());
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
  
    // Build a sequence of copy-to-reg nodes chained together with token chain
    // and flag operands which copy the outgoing args into the appropriate regs.
@@ -3733,7 +3940,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
      SDValue Ops[] = { Chain, InFlag };
  
      Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
-                        dl, VTs, Ops, InFlag.getNode() ? 2 : 1);
+                        dl, VTs,
+                        ArrayRef<SDValue>(Ops, InFlag.getNode() ? 2 : 1));
  
      InFlag = Chain.getValue(1);
    }
@@ -3754,13 +3962,14 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
                                                SDValue CallSeqStart,
                                                ISD::ArgFlagsTy Flags,
                                                SelectionDAG &DAG,
-                                              DebugLoc dl) const {
+                                              SDLoc dl) const {
    SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
                          CallSeqStart.getNode()->getOperand(0),
                          Flags, DAG, dl);
    // The MEMCPY must go outside the CALLSEQ_START..END.
    SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
-                             CallSeqStart.getNode()->getOperand(1));
+                             CallSeqStart.getNode()->getOperand(1),
+                             SDLoc(MemcpyCall));
    DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
                           NewCallSeqStart.getNode());
    return NewCallSeqStart;
@@ -3773,7 +3982,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                      const SmallVectorImpl<SDValue> &OutVals,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SDLoc dl, SelectionDAG &DAG,
                                      SmallVectorImpl<SDValue> &InVals) const {
  
    unsigned NumOps = Outs.size();
@@ -3814,7 +4023,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
  
    // Adjust the stack pointer for the new arguments...
    // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               dl);
    SDValue CallSeqStart = Chain;
  
    // Load the return address and frame pointer so it can be move somewhere else
@@ -3835,16 +4045,21 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
    unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
    unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
  
-  static const uint16_t GPR[] = {
+  static const MCPhysReg GPR[] = {
      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
      PPC::X7, PPC::X8, PPC::X9, PPC::X10,
    };
-  static const uint16_t *FPR = GetFPR();
+  static const MCPhysReg *FPR = GetFPR();
  
-  static const uint16_t VR[] = {
+  static const MCPhysReg VR[] = {
      PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
      PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
    };
+  static const MCPhysReg VSRH[] = {
+    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+  };
+
    const unsigned NumGPRs = array_lengthof(GPR);
    const unsigned NumFPRs = 13;
    const unsigned NumVRs  = array_lengthof(VR);
@@ -3866,7 +4081,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
  
      // Promote integers to 64-bit values.
-    if (Arg.getValueType() == MVT::i32) {
+    if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
        // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
        unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
        Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
@@ -3888,6 +4103,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        if (Size == 0)
          continue;
  
+      unsigned BVAlign = Flags.getByValAlign();
+      if (BVAlign > 8) {
+        if (BVAlign % PtrByteSize != 0)
+          llvm_unreachable(
+            "ByVal alignment is not a multiple of the pointer size");
+
+        ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+      }
+
        // All aggregates smaller than 8 bytes must be passed right-justified.
        if (Size==1 || Size==2 || Size==4) {
          EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -3939,7 +4163,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
          // register.
          // FIXME: The memcpy seems to produce pretty awful code for
          // small aggregates, particularly for packed ones.
-        // FIXME: It would be preferable to use the slot in the 
+        // FIXME: It would be preferable to use the slot in the
          // parameter save area instead of a new local variable.
          SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
          SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
@@ -3979,8 +4203,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        continue;
      }
  
-    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+    switch (Arg.getSimpleValueType().SimpleTy) {
      default: llvm_unreachable("Unexpected ValueType for argument!");
+    case MVT::i1:
      case MVT::i32:
      case MVT::i64:
        if (GPR_idx != NumGPRs) {
@@ -4002,7 +4227,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
            // must be passed right-justified in the stack doubleword, and
            // in the GPR, if one is available.
            SDValue StoreOff;
-          if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) {
+          if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
              SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
              StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
            } else
@@ -4041,6 +4266,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
      case MVT::v4i32:
      case MVT::v8i16:
      case MVT::v16i8:
+    case MVT::v2f64:
+    case MVT::v2i64:
        if (isVarArg) {
          // These go aligned on the stack, or in the corresponding R registers
          // when within range.  The Darwin PPC ABI doc claims they also go in
@@ -4064,7 +4291,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                                       MachinePointerInfo(),
                                       false, false, false, 0);
            MemOpChains.push_back(Load.getValue(1));
-          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+
+          unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
+                           Arg.getSimpleValueType() == MVT::v2i64) ?
+                          VSRH[VR_idx] : VR[VR_idx];
+          ++VR_idx;
+
+          RegsToPass.push_back(std::make_pair(VReg, Load));
          }
          ArgOffset += 16;
          for (unsigned i=0; i<16; i+=PtrByteSize) {
@@ -4084,7 +4317,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        // stack space allocated at the end.
        if (VR_idx != NumVRs) {
          // Doesn't have GPR space allocated.
-        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+        unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
+                         Arg.getSimpleValueType() == MVT::v2i64) ?
+                        VSRH[VR_idx] : VR[VR_idx];
+        ++VR_idx;
+
+        RegsToPass.push_back(std::make_pair(VReg, Arg));
        } else {
          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
                           true, isTailCall, true, MemOpChains,
@@ -4096,8 +4334,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
    }
  
    if (!MemOpChains.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                        &MemOpChains[0], MemOpChains.size());
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
  
    // Check if this is an indirect call (MTCTR/BCTRL).
    // See PrepareCall() for more information about calls through function
@@ -4144,7 +4381,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                      const SmallVectorImpl<SDValue> &OutVals,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SDLoc dl, SelectionDAG &DAG,
                                      SmallVectorImpl<SDValue> &InVals) const {
  
    unsigned NumOps = Outs.size();
@@ -4185,7 +4422,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
  
    // Adjust the stack pointer for the new arguments...
    // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               dl);
    SDValue CallSeqStart = Chain;
  
    // Load the return address and frame pointer so it can be move somewhere else
@@ -4210,17 +4448,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
    unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
    unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
  
-  static const uint16_t GPR_32[] = {           // 32-bit registers.
+  static const MCPhysReg GPR_32[] = {           // 32-bit registers.
      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
    };
-  static const uint16_t GPR_64[] = {           // 64-bit registers.
+  static const MCPhysReg GPR_64[] = {           // 64-bit registers.
      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
      PPC::X7, PPC::X8, PPC::X9, PPC::X10,
    };
-  static const uint16_t *FPR = GetFPR();
+  static const MCPhysReg *FPR = GetFPR();
  
-  static const uint16_t VR[] = {
+  static const MCPhysReg VR[] = {
      PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
      PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
    };
@@ -4228,7 +4466,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
    const unsigned NumFPRs = 13;
    const unsigned NumVRs  = array_lengthof(VR);
  
-  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
+  const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
  
    SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
    SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
@@ -4309,11 +4547,15 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
        continue;
      }
  
-    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+    switch (Arg.getSimpleValueType().SimpleTy) {
      default: llvm_unreachable("Unexpected ValueType for argument!");
+    case MVT::i1:
      case MVT::i32:
      case MVT::i64:
        if (GPR_idx != NumGPRs) {
+        if (Arg.getValueType() == MVT::i1)
+          Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
+
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
        } else {
          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
@@ -4454,8 +4696,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
    }
  
    if (!MemOpChains.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                        &MemOpChains[0], MemOpChains.size());
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
  
    // On Darwin, R12 must contain the address of an indirect callee.  This does
    // not mean the MTCTR instruction must use R12; it's easier to model this as
@@ -4501,7 +4742,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
                                 CallingConv::ID CallConv, bool isVarArg,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                                 const SmallVectorImpl<SDValue> &OutVals,
-                               DebugLoc dl, SelectionDAG &DAG) const {
+                               SDLoc dl, SelectionDAG &DAG) const {
  
    SmallVector<CCValAssign, 16> RVLocs;
    CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
@@ -4543,14 +4784,13 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
    if (Flag.getNode())
      RetOps.push_back(Flag);
  
-  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
-                     &RetOps[0], RetOps.size());
+  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
  }
  
  SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
                                     const PPCSubtarget &Subtarget) const {
    // When we pop the dynamic allocation we need to restore the SP link.
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
  
    // Get the corect type for pointers.
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -4635,7 +4875,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
    // Get the inputs.
    SDValue Chain = Op.getOperand(0);
    SDValue Size  = Op.getOperand(1);
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
  
    // Get the corect type for pointers.
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -4647,12 +4887,12 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
    // Build a DYNALLOC node.
    SDValue Ops[3] = { Chain, NegSize, FPSIdx };
    SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
-  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
+  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
  }
  
  SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
                                                 SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
                       DAG.getVTList(MVT::i32, MVT::Other),
                       Op.getOperand(0), Op.getOperand(1));
@@ -4660,11 +4900,60 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
  
  SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
                                                  SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
                       Op.getOperand(0), Op.getOperand(1));
  }
  
+SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::i1 &&
+         "Custom lowering only for i1 loads");
+
+  // First, load 8 bits into 32 bits, then truncate to 1 bit.
+
+  SDLoc dl(Op);
+  LoadSDNode *LD = cast<LoadSDNode>(Op);
+
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  MachineMemOperand *MMO = LD->getMemOperand();
+
+  SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
+                                 BasePtr, MVT::i8, MMO);
+  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
+
+  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
+  return DAG.getMergeValues(Ops, dl);
+}
+
+SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
+         "Custom lowering only for i1 stores");
+
+  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
+
+  SDLoc dl(Op);
+  StoreSDNode *ST = cast<StoreSDNode>(Op);
+
+  SDValue Chain = ST->getChain();
+  SDValue BasePtr = ST->getBasePtr();
+  SDValue Value = ST->getValue();
+  MachineMemOperand *MMO = ST->getMemOperand();
+
+  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
+  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
+}
+
+// FIXME: Remove this once the ANDI glue bug is fixed:
+SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::i1 &&
+         "Custom lowering only for i1 results");
+
+  SDLoc DL(Op);
+  return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
+                     Op.getOperand(0));
+}
+
  /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
  /// possible.
  SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -4673,22 +4962,37 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
        !Op.getOperand(2).getValueType().isFloatingPoint())
      return Op;
  
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  // We might be able to do better than this under some circumstances, but in
+  // general, fsel-based lowering of select is a finite-math-only optimization.
+  // For more information, see section F.3 of the 2.06 ISA specification.
+  if (!DAG.getTarget().Options.NoInfsFPMath ||
+      !DAG.getTarget().Options.NoNaNsFPMath)
+    return Op;
  
-  // Cannot handle SETEQ/SETNE.
-  if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
  
    EVT ResVT = Op.getValueType();
    EVT CmpVT = Op.getOperand(0).getValueType();
    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
    SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
  
    // If the RHS of the comparison is a 0.0, we don't need to do the
    // subtraction at all.
+  SDValue Sel1;
    if (isFloatingPointZero(RHS))
      switch (CC) {
      default: break;       // SETUO etc aren't handled by fsel.
+    case ISD::SETNE:
+      std::swap(TV, FV);
+    case ISD::SETEQ:
+      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+      Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+      if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
      case ISD::SETULT:
      case ISD::SETLT:
        std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
@@ -4711,44 +5015,55 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
    SDValue Cmp;
    switch (CC) {
    default: break;       // SETUO etc aren't handled by fsel.
+  case ISD::SETNE:
+    std::swap(TV, FV);
+  case ISD::SETEQ:
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+    Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+    if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+                       DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
    case ISD::SETULT:
    case ISD::SETLT:
      Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
        Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
    case ISD::SETOGE:
    case ISD::SETGE:
      Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
        Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
    case ISD::SETUGT:
    case ISD::SETGT:
      Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
        Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
    case ISD::SETOLE:
    case ISD::SETLE:
      Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
      if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
        Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
-      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+    return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
    }
    return Op;
  }
  
  // FIXME: Split this code up when LegalizeDAGTypes lands.
  SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
-                                           DebugLoc dl) const {
+                                           SDLoc dl) const {
    assert(Op.getOperand(0).getValueType().isFloatingPoint());
    SDValue Src = Op.getOperand(0);
    if (Src.getValueType() == MVT::f32)
      Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
  
    SDValue Tmp;
-  switch (Op.getValueType().getSimpleVT().SimpleTy) {
+  switch (Op.getSimpleValueType().SimpleTy) {
    default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
    case MVT::i32:
      Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
@@ -4780,8 +5095,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
        MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
      SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
      Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
-              DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
-              MVT::i32, MMO);
+              DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
    } else
      Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
                           MPI, false, false, 0);
@@ -4800,11 +5114,16 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
  
  SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                                             SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    // Don't handle ppc_fp128 here; let it be lowered to a libcall.
    if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
      return SDValue();
  
+  if (Op.getOperand(0).getValueType() == MVT::i1)
+    return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
+                       DAG.getConstantFP(1.0, Op.getValueType()),
+                       DAG.getConstantFP(0.0, Op.getValueType()));
+
    assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
           "UINT_TO_FP is supported only with FPCVT");
  
@@ -4903,7 +5222,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
      Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
                                     PPCISD::LFIWZX : PPCISD::LFIWAX,
                                   dl, DAG.getVTList(MVT::f64, MVT::Other),
-                                 Ops, 2, MVT::i32, MMO);
+                                 Ops, MVT::i32, MMO);
    } else {
      assert(PPCSubTarget.isPPC64() &&
             "i32->FP without LFIWAX supported only on PPC64");
@@ -4934,7 +5253,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
  
  SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
                                              SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    /*
     The rounding mode is in bits 30:31 of FPSR, and has the following
     settings:
@@ -4957,14 +5276,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
    MachineFunction &MF = DAG.getMachineFunction();
    EVT VT = Op.getValueType();
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
-  SDValue MFFSreg, InFlag;
  
    // Save FP Control Word to register
    EVT NodeTys[] = {
      MVT::f64,    // return register
      MVT::Glue    // unused in this context
    };
-  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, ArrayRef<SDValue>());
  
    // Save FP register to stack slot
    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
@@ -5000,7 +5318,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
  SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
    EVT VT = Op.getValueType();
    unsigned BitWidth = VT.getSizeInBits();
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    assert(Op.getNumOperands() == 3 &&
           VT == Op.getOperand(1).getValueType() &&
           "Unexpected SHL!");
@@ -5023,12 +5341,12 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
    SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
    SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
    SDValue OutOps[] = { OutLo, OutHi };
-  return DAG.getMergeValues(OutOps, 2, dl);
+  return DAG.getMergeValues(OutOps, dl);
  }
  
  SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
    EVT VT = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    unsigned BitWidth = VT.getSizeInBits();
    assert(Op.getNumOperands() == 3 &&
           VT == Op.getOperand(1).getValueType() &&
@@ -5052,11 +5370,11 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
    SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
    SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
    SDValue OutOps[] = { OutLo, OutHi };
-  return DAG.getMergeValues(OutOps, 2, dl);
+  return DAG.getMergeValues(OutOps, dl);
  }
  
  SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    EVT VT = Op.getValueType();
    unsigned BitWidth = VT.getSizeInBits();
    assert(Op.getNumOperands() == 3 &&
@@ -5081,7 +5399,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
    SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
                                    Tmp4, Tmp6, ISD::SETLE);
    SDValue OutOps[] = { OutLo, OutHi };
-  return DAG.getMergeValues(OutOps, 2, dl);
+  return DAG.getMergeValues(OutOps, dl);
  }
  
  //===----------------------------------------------------------------------===//
@@ -5091,7 +5409,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
  /// BuildSplatI - Build a canonical splati of Val with an element size of
  /// SplatSize.  Cast the result to VT.
  static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
-                             SelectionDAG &DAG, DebugLoc dl) {
+                             SelectionDAG &DAG, SDLoc dl) {
    assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
  
    static const EVT VTys[] = { // canonical VT to use for each size.
@@ -5110,15 +5428,24 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
    SDValue Elt = DAG.getConstant(Val, MVT::i32);
    SmallVector<SDValue, 8> Ops;
    Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
-  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
-                              &Ops[0], Ops.size());
+  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
    return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
  }
  
+/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
+                                SelectionDAG &DAG, SDLoc dl,
+                                EVT DestVT = MVT::Other) {
+  if (DestVT == MVT::Other) DestVT = Op.getValueType();
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+                     DAG.getConstant(IID, MVT::i32), Op);
+}
+
  /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
  /// specified intrinsic ID.
  static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
-                                SelectionDAG &DAG, DebugLoc dl,
+                                SelectionDAG &DAG, SDLoc dl,
                                  EVT DestVT = MVT::Other) {
    if (DestVT == MVT::Other) DestVT = LHS.getValueType();
    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
@@ -5129,7 +5456,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
  /// specified intrinsic ID.
  static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
                                  SDValue Op2, SelectionDAG &DAG,
-                                DebugLoc dl, EVT DestVT = MVT::Other) {
+                                SDLoc dl, EVT DestVT = MVT::Other) {
    if (DestVT == MVT::Other) DestVT = Op0.getValueType();
    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
                       DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
@@ -5139,7 +5466,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
  /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
  /// amount.  The result has the specified value type.
  static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
-                             EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+                             EVT VT, SelectionDAG &DAG, SDLoc dl) {
    // Force LHS/RHS to be the right type.
    LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
    RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
@@ -5158,9 +5485,9 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
  // sequence of ops that should be used.
  SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
                                               SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
-  assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
  
    // Check if this is a splat of a constant value.
    APInt APSplatBits, APSplatUndef;
@@ -5314,7 +5641,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
  /// the specified operations to build the shuffle.
  static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
                                        SDValue RHS, SelectionDAG &DAG,
-                                      DebugLoc dl) {
+                                      SDLoc dl) {
    unsigned OpNum = (PFEntry >> 26) & 0x0F;
    unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
    unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
@@ -5393,7 +5720,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
  /// lowered into a vperm.
  SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
                                                 SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    SDValue V1 = Op.getOperand(0);
    SDValue V2 = Op.getOperand(1);
    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -5506,7 +5833,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
    }
  
    SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
-                                    &ResultMask[0], ResultMask.size());
+                                  ResultMask);
    return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
  }
  
@@ -5560,7 +5887,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                     SelectionDAG &DAG) const {
    // If this is a lowered altivec predicate compare, CompareOpc is set to the
    // opcode number of the comparison.
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    int CompareOpc;
    bool isDot;
    if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
@@ -5581,11 +5908,11 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
      DAG.getConstant(CompareOpc, MVT::i32)
    };
    EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
-  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
  
    // Now that we have the comparison, emit a copy from the CR to a GPR.
    // This is flagged to the above dot comparison.
-  SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
                                  DAG.getRegister(PPC::CR6, MVT::i32),
                                  CompNode.getValue(1));
  
@@ -5622,9 +5949,33 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
    return Flags;
  }
  
+SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
+  // instructions), but for smaller types, we need to first extend up to v2i32
+  // before doing going farther.
+  if (Op.getValueType() == MVT::v2i64) {
+    EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    if (ExtVT != MVT::v2i32) {
+      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
+      Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
+                       DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
+                                        ExtVT.getVectorElementType(), 4)));
+      Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
+      Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
+                       DAG.getValueType(MVT::v2i32));
+    }
+
+    return Op;
+  }
+
+  return SDValue();
+}
+
  SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                                                     SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    // Create a stack slot that is 16-byte aligned.
    MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
    int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
@@ -5641,7 +5992,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
  }
  
  SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    if (Op.getValueType() == MVT::v4i32) {
      SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
  
@@ -5718,6 +6069,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::VAARG:
      return LowerVAARG(Op, DAG, PPCSubTarget);
  
+  case ISD::VACOPY:
+    return LowerVACOPY(Op, DAG, PPCSubTarget);
+
    case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
    case ISD::DYNAMIC_STACKALLOC:
      return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
@@ -5725,10 +6079,13 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
    case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
  
+  case ISD::LOAD:               return LowerLOAD(Op, DAG);
+  case ISD::STORE:              return LowerSTORE(Op, DAG);
+  case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
    case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
    case ISD::FP_TO_UINT:
    case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
-                                                       Op.getDebugLoc());
+                                                       SDLoc(Op));
    case ISD::UINT_TO_FP:
    case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
    case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
@@ -5743,8 +6100,12 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
    case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
    case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
+  case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op, DAG);
    case ISD::MUL:                return LowerMUL(Op, DAG);
  
+  // For counter-based loop handling.
+  case ISD::INTRINSIC_W_CHAIN:  return SDValue();
+
    // Frame & Return address.
    case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
    case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
@@ -5755,10 +6116,26 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                                             SmallVectorImpl<SDValue>&Results,
                                             SelectionDAG &DAG) const {
    const TargetMachine &TM = getTargetMachine();
-  DebugLoc dl = N->getDebugLoc();
+  SDLoc dl(N);
    switch (N->getOpcode()) {
    default:
      llvm_unreachable("Do not know how to custom type legalize this operation!");
+  case ISD::INTRINSIC_W_CHAIN: {
+    if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
+        Intrinsic::ppc_is_decremented_ctr_nonzero)
+      break;
+
+    assert(N->getValueType(0) == MVT::i1 &&
+           "Unexpected result type for CTR decrement intrinsic");
+    EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
+    SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
+    SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
+                                 N->getOperand(1)); 
+
+    Results.push_back(NewInt);
+    Results.push_back(NewInt.getValue(1));
+    break;
+  }
    case ISD::VAARG: {
      if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
          || TM.getSubtarget<PPCSubtarget>().isPPC64())
@@ -5794,6 +6171,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
      return;
    }
    case ISD::FP_TO_SINT:
+    // LowerFP_TO_INT() can only handle f32 and f64.
+    if (N->getOperand(0).getValueType() == MVT::ppcf128)
+      return;
      Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
      return;
    }
@@ -5826,8 +6206,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
    F->insert(It, loopMBB);
    F->insert(It, exitMBB);
    exitMBB->splice(exitMBB->begin(), BB,
-                  llvm::next(MachineBasicBlock::iterator(MI)),
-                  BB->end());
+                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
  
    MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -5895,8 +6274,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
    F->insert(It, loopMBB);
    F->insert(It, exitMBB);
    exitMBB->splice(exitMBB->begin(), BB,
-                  llvm::next(MachineBasicBlock::iterator(MI)),
-                  BB->end());
+                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
  
    MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -6048,7 +6426,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
  
    // Transfer the remainder of BB and its successor edges to sinkMBB.
    sinkMBB->splice(sinkMBB->begin(), MBB,
-                  llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
    sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
  
    // Note that the structure of the jmp_buf used here is not compatible
@@ -6065,6 +6443,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
    // thisMBB:
    const int64_t LabelOffset = 1 * PVT.getStoreSize();
    const int64_t TOCOffset   = 3 * PVT.getStoreSize();
+  const int64_t BPOffset    = 4 * PVT.getStoreSize();
  
    // Prepare IP either in reg.
    const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
@@ -6074,15 +6453,32 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
    if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
      MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
              .addReg(PPC::X2)
-            .addImm(TOCOffset / 4)
+            .addImm(TOCOffset)
              .addReg(BufReg);
-
      MIB.setMemRefs(MMOBegin, MMOEnd);
    }
  
+  // Naked functions never have a base pointer, and so we use r1. For all
+  // other functions, this decision must be delayed until during PEI.
+  unsigned BaseReg;
+  if (MF->getFunction()->getAttributes().hasAttribute(
+          AttributeSet::FunctionIndex, Attribute::Naked))
+    BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;
+  else
+    BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;
+
+  MIB = BuildMI(*thisMBB, MI, DL,
+                TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))
+          .addReg(BaseReg)
+          .addImm(BPOffset)
+          .addReg(BufReg);
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
    // Setup
    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
-  MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+  const PPCRegisterInfo *TRI =
+    static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
+  MIB.addRegMask(TRI->getNoPreservedMask());
  
    BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
  
@@ -6102,7 +6498,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
    if (PPCSubTarget.isPPC64()) {
      MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
              .addReg(LabelReg)
-            .addImm(LabelOffset / 4)
+            .addImm(LabelOffset)
              .addReg(BufReg);
    } else {
      MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
@@ -6149,12 +6545,14 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
    // Since FP is only updated here but NOT referenced, it's treated as GPR.
    unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
    unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+  unsigned BP  = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
  
    MachineInstrBuilder MIB;
  
    const int64_t LabelOffset = 1 * PVT.getStoreSize();
    const int64_t SPOffset    = 2 * PVT.getStoreSize();
    const int64_t TOCOffset   = 3 * PVT.getStoreSize();
+  const int64_t BPOffset    = 4 * PVT.getStoreSize();
  
    unsigned BufReg = MI->getOperand(0).getReg();
  
@@ -6175,7 +6573,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
    // Reload IP
    if (PVT == MVT::i64) {
      MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
-            .addImm(LabelOffset / 4)
+            .addImm(LabelOffset)
              .addReg(BufReg);
    } else {
      MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
@@ -6187,7 +6585,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
    // Reload SP
    if (PVT == MVT::i64) {
      MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
-            .addImm(SPOffset / 4)
+            .addImm(SPOffset)
              .addReg(BufReg);
    } else {
      MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
@@ -6196,13 +6594,22 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
    }
    MIB.setMemRefs(MMOBegin, MMOEnd);
  
-  // FIXME: When we also support base pointers, that register must also be
-  // restored here.
+  // Reload BP
+  if (PVT == MVT::i64) {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
+            .addImm(BPOffset)
+            .addReg(BufReg);
+  } else {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
+            .addImm(BPOffset)
+            .addReg(BufReg);
+  }
+  MIB.setMemRefs(MMOBegin, MMOEnd);
  
    // Reload TOC
    if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
      MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
-            .addImm(TOCOffset / 4)
+            .addImm(TOCOffset)
              .addReg(BufReg);
  
      MIB.setMemRefs(MMOBegin, MMOEnd);
@@ -6239,21 +6646,32 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
    MachineFunction *F = BB->getParent();
  
    if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
-                                 MI->getOpcode() == PPC::SELECT_CC_I8)) {
+                                 MI->getOpcode() == PPC::SELECT_CC_I8 ||
+                                 MI->getOpcode() == PPC::SELECT_I4 ||
+                                 MI->getOpcode() == PPC::SELECT_I8)) {
      SmallVector<MachineOperand, 2> Cond;
-    Cond.push_back(MI->getOperand(4));
+    if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+        MI->getOpcode() == PPC::SELECT_CC_I8)
+      Cond.push_back(MI->getOperand(4));
+    else
+      Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
      Cond.push_back(MI->getOperand(1));
  
      DebugLoc dl = MI->getDebugLoc();
-    PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond,
-                        MI->getOperand(2).getReg(), MI->getOperand(3).getReg());
+    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+    TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
+                      Cond, MI->getOperand(2).getReg(),
+                      MI->getOperand(3).getReg());
    } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
               MI->getOpcode() == PPC::SELECT_CC_I8 ||
               MI->getOpcode() == PPC::SELECT_CC_F4 ||
               MI->getOpcode() == PPC::SELECT_CC_F8 ||
-             MI->getOpcode() == PPC::SELECT_CC_VRRC) {
-
-
+             MI->getOpcode() == PPC::SELECT_CC_VRRC ||
+             MI->getOpcode() == PPC::SELECT_I4 ||
+             MI->getOpcode() == PPC::SELECT_I8 ||
+             MI->getOpcode() == PPC::SELECT_F4 ||
+             MI->getOpcode() == PPC::SELECT_F8 ||
+             MI->getOpcode() == PPC::SELECT_VRRC) {
      // The incoming instruction knows the destination vreg to set, the
      // condition code register to branch on, the true/false values to
      // select between, and a branch opcode to use.
@@ -6267,23 +6685,31 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      MachineBasicBlock *thisMBB = BB;
      MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
      MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
-    unsigned SelectPred = MI->getOperand(4).getImm();
      DebugLoc dl = MI->getDebugLoc();
      F->insert(It, copy0MBB);
      F->insert(It, sinkMBB);
  
      // Transfer the remainder of BB and its successor edges to sinkMBB.
      sinkMBB->splice(sinkMBB->begin(), BB,
-                    llvm::next(MachineBasicBlock::iterator(MI)),
-                    BB->end());
+                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
      sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
  
      // Next, add the true and fallthrough blocks as its successors.
      BB->addSuccessor(copy0MBB);
      BB->addSuccessor(sinkMBB);
  
-    BuildMI(BB, dl, TII->get(PPC::BCC))
-      .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+    if (MI->getOpcode() == PPC::SELECT_I4 ||
+        MI->getOpcode() == PPC::SELECT_I8 ||
+        MI->getOpcode() == PPC::SELECT_F4 ||
+        MI->getOpcode() == PPC::SELECT_F8 ||
+        MI->getOpcode() == PPC::SELECT_VRRC) {
+      BuildMI(BB, dl, TII->get(PPC::BC))
+        .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+    } else {
+      unsigned SelectPred = MI->getOperand(4).getImm();
+      BuildMI(BB, dl, TII->get(PPC::BCC))
+        .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+    }
  
      //  copy0MBB:
      //   %FalseValue = ...
@@ -6385,8 +6811,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      F->insert(It, midMBB);
      F->insert(It, exitMBB);
      exitMBB->splice(exitMBB->begin(), BB,
-                    llvm::next(MachineBasicBlock::iterator(MI)),
-                    BB->end());
+                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
      exitMBB->transferSuccessorsAndUpdatePHIs(BB);
  
      //  thisMBB:
@@ -6456,8 +6881,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      F->insert(It, midMBB);
      F->insert(It, exitMBB);
      exitMBB->splice(exitMBB->begin(), BB,
-                    llvm::next(MachineBasicBlock::iterator(MI)),
-                    BB->end());
+                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
      exitMBB->transferSuccessorsAndUpdatePHIs(BB);
  
      MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -6606,51 +7030,27 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  
      // Restore FPSCR value.
      BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
-  } else if (MI->getOpcode() == PPC::FRINDrint ||
-             MI->getOpcode() == PPC::FRINSrint) {
-    bool isf32 = MI->getOpcode() == PPC::FRINSrint;
-    unsigned Dest = MI->getOperand(0).getReg();
-    unsigned Src = MI->getOperand(1).getReg();
-    DebugLoc dl   = MI->getDebugLoc();
+  } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+             MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
+             MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+             MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {
+    unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+                       MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?
+                      PPC::ANDIo8 : PPC::ANDIo;
+    bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+                 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);
  
      MachineRegisterInfo &RegInfo = F->getRegInfo();
-    unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+    unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
+                                                  &PPC::GPRCRegClass :
+                                                  &PPC::G8RCRegClass);
  
-    // Perform the rounding.
-    BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
-      .addReg(Src);
-
-    // Compare the results.
-    BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
-      .addReg(Dest).addReg(Src);
-
-    // If the results were not equal, then set the FPSCR XX bit.
-    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
-    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
-    F->insert(It, midMBB);
-    F->insert(It, exitMBB);
-    exitMBB->splice(exitMBB->begin(), BB,
-                    llvm::next(MachineBasicBlock::iterator(MI)),
-                    BB->end());
-    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
-    BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
-      .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
-
-    BB->addSuccessor(midMBB);
-    BB->addSuccessor(exitMBB);
-
-    BB = midMBB;
-
-    // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
-    // the FI bit here because that will not automatically set XX also,
-    // and XX is what libm interprets as the FE_INEXACT flag.
-    BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
-    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
-
-    BB->addSuccessor(exitMBB);
-
-    BB = exitMBB;
+    DebugLoc dl   = MI->getDebugLoc();
+    BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
+      .addReg(MI->getOperand(1).getReg()).addImm(1);
+    BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
+            MI->getOperand(0).getReg())
+      .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
    } else {
      llvm_unreachable("Unexpected instr type to insert");
    }
@@ -6672,7 +7072,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
  
    if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) ||
        (VT == MVT::f64 && PPCSubTarget.hasFRE())  ||
-      (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+      (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
+      (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
  
      // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
      // For the reciprocal, we need to find the zero of the function:
@@ -6690,7 +7091,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
        ++Iterations;
  
      SelectionDAG &DAG = DCI.DAG;
-    DebugLoc dl = Op.getDebugLoc();
+    SDLoc dl(Op);
  
      SDValue FPOne =
        DAG.getConstantFP(1.0, VT.getScalarType());
@@ -6734,7 +7135,8 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
  
    if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) ||
        (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE())  ||
-      (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) {
+      (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) ||
+      (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) {
  
      // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
      // For the reciprocal sqrt, we need to find the zero of the function:
@@ -6752,7 +7154,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
        ++Iterations;
  
      SelectionDAG &DAG = DCI.DAG;
-    DebugLoc dl = Op.getDebugLoc();
+    SDLoc dl(Op);
  
      SDValue FPThreeHalves =
        DAG.getConstantFP(1.5, VT.getScalarType());
@@ -6796,11 +7198,648 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
    return SDValue();
  }
  
+// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
+// not enforce equality of the chain operands.
+static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
+                            unsigned Bytes, int Dist,
+                            SelectionDAG &DAG) {
+  EVT VT = LS->getMemoryVT();
+  if (VT.getSizeInBits() / 8 != Bytes)
+    return false;
+
+  SDValue Loc = LS->getBasePtr();
+  SDValue BaseLoc = Base->getBasePtr();
+  if (Loc.getOpcode() == ISD::FrameIndex) {
+    if (BaseLoc.getOpcode() != ISD::FrameIndex)
+      return false;
+    const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
+    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+    int FS  = MFI->getObjectSize(FI);
+    int BFS = MFI->getObjectSize(BFI);
+    if (FS != BFS || FS != (int)Bytes) return false;
+    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+  }
+
+  // Handle X+C
+  if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+    return true;
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  const GlobalValue *GV1 = nullptr;
+  const GlobalValue *GV2 = nullptr;
+  int64_t Offset1 = 0;
+  int64_t Offset2 = 0;
+  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+  if (isGA1 && isGA2 && GV1 == GV2)
+    return Offset1 == (Offset2 + Dist*Bytes);
+  return false;
+}
+
+// Return true is there is a nearyby consecutive load to the one provided
+// (regardless of alignment). We search up and down the chain, looking though
+// token factors and other loads (but nothing else). As a result, a true
+// results indicates that it is safe to create a new consecutive load adjacent
+// to the load provided.
+static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
+  SDValue Chain = LD->getChain();
+  EVT VT = LD->getMemoryVT();
+
+  SmallSet<SDNode *, 16> LoadRoots;
+  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
+  SmallSet<SDNode *, 16> Visited;
+
+  // First, search up the chain, branching to follow all token-factor operands.
+  // If we find a consecutive load, then we're done, otherwise, record all
+  // nodes just above the top-level loads and token factors.
+  while (!Queue.empty()) {
+    SDNode *ChainNext = Queue.pop_back_val();
+    if (!Visited.insert(ChainNext))
+      continue;
+
+    if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+      if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+        return true;
+
+      if (!Visited.count(ChainLD->getChain().getNode()))
+        Queue.push_back(ChainLD->getChain().getNode());
+    } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
+      for (SDNode::op_iterator O = ChainNext->op_begin(),
+           OE = ChainNext->op_end(); O != OE; ++O)
+        if (!Visited.count(O->getNode()))
+          Queue.push_back(O->getNode());
+    } else
+      LoadRoots.insert(ChainNext);
+  }
+
+  // Second, search down the chain, starting from the top-level nodes recorded
+  // in the first phase. These top-level nodes are the nodes just above all
+  // loads and token factors. Starting with their uses, recursively look though
+  // all loads (just the chain uses) and token factors to find a consecutive
+  // load.
+  Visited.clear();
+  Queue.clear();
+
+  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
+       IE = LoadRoots.end(); I != IE; ++I) {
+    Queue.push_back(*I);
+       
+    while (!Queue.empty()) {
+      SDNode *LoadRoot = Queue.pop_back_val();
+      if (!Visited.insert(LoadRoot))
+        continue;
+
+      if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+        if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+          return true;
+
+      for (SDNode::use_iterator UI = LoadRoot->use_begin(),
+           UE = LoadRoot->use_end(); UI != UE; ++UI)
+        if (((isa<LoadSDNode>(*UI) &&
+            cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+            UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
+          Queue.push_back(*UI);
+    }
+  }
+
+  return false;
+}
+
+SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
+                                                  DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc dl(N);
+
+  assert(PPCSubTarget.useCRBits() &&
+         "Expecting to be tracking CR bits");
+  // If we're tracking CR bits, we need to be careful that we don't have:
+  //   trunc(binary-ops(zext(x), zext(y)))
+  // or
+  //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
+  // such that we're unnecessarily moving things into GPRs when it would be
+  // better to keep them in CR bits.
+
+  // Note that trunc here can be an actual i1 trunc, or can be the effective
+  // truncation that comes from a setcc or select_cc.
+  if (N->getOpcode() == ISD::TRUNCATE &&
+      N->getValueType(0) != MVT::i1)
+    return SDValue();
+
+  if (N->getOperand(0).getValueType() != MVT::i32 &&
+      N->getOperand(0).getValueType() != MVT::i64)
+    return SDValue();
+
+  if (N->getOpcode() == ISD::SETCC ||
+      N->getOpcode() == ISD::SELECT_CC) {
+    // If we're looking at a comparison, then we need to make sure that the
+    // high bits (all except for the first) don't matter the result.
+    ISD::CondCode CC =
+      cast<CondCodeSDNode>(N->getOperand(
+        N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
+    unsigned OpBits = N->getOperand(0).getValueSizeInBits();
+
+    if (ISD::isSignedIntSetCC(CC)) {
+      if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
+          DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
+        return SDValue();
+    } else if (ISD::isUnsignedIntSetCC(CC)) {
+      if (!DAG.MaskedValueIsZero(N->getOperand(0),
+                                 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
+          !DAG.MaskedValueIsZero(N->getOperand(1),
+                                 APInt::getHighBitsSet(OpBits, OpBits-1)))
+        return SDValue();
+    } else {
+      // This is neither a signed nor an unsigned comparison, just make sure
+      // that the high bits are equal.
+      APInt Op1Zero, Op1One;
+      APInt Op2Zero, Op2One;
+      DAG.ComputeMaskedBits(N->getOperand(0), Op1Zero, Op1One);
+      DAG.ComputeMaskedBits(N->getOperand(1), Op2Zero, Op2One);
+
+      // We don't really care about what is known about the first bit (if
+      // anything), so clear it in all masks prior to comparing them.
+      Op1Zero.clearBit(0); Op1One.clearBit(0);
+      Op2Zero.clearBit(0); Op2One.clearBit(0);
+
+      if (Op1Zero != Op2Zero || Op1One != Op2One)
+        return SDValue();
+    }
+  }
+
+  // We now know that the higher-order bits are irrelevant, we just need to
+  // make sure that all of the intermediate operations are bit operations, and
+  // all inputs are extensions.
+  if (N->getOperand(0).getOpcode() != ISD::AND &&
+      N->getOperand(0).getOpcode() != ISD::OR  &&
+      N->getOperand(0).getOpcode() != ISD::XOR &&
+      N->getOperand(0).getOpcode() != ISD::SELECT &&
+      N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
+      N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
+      N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
+      N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
+      N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
+    return SDValue();
+
+  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
+      N->getOperand(1).getOpcode() != ISD::AND &&
+      N->getOperand(1).getOpcode() != ISD::OR  &&
+      N->getOperand(1).getOpcode() != ISD::XOR &&
+      N->getOperand(1).getOpcode() != ISD::SELECT &&
+      N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
+      N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
+      N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
+      N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
+      N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
+    return SDValue();
+
+  SmallVector<SDValue, 4> Inputs;
+  SmallVector<SDValue, 8> BinOps, PromOps;
+  SmallPtrSet<SDNode *, 16> Visited;
+
+  for (unsigned i = 0; i < 2; ++i) {
+    if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+          N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+          N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+          N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+        isa<ConstantSDNode>(N->getOperand(i)))
+      Inputs.push_back(N->getOperand(i));
+    else
+      BinOps.push_back(N->getOperand(i));
+
+    if (N->getOpcode() == ISD::TRUNCATE)
+      break;
+  }
+
+  // Visit all inputs, collect all binary operations (and, or, xor and
+  // select) that are all fed by extensions. 
+  while (!BinOps.empty()) {
+    SDValue BinOp = BinOps.back();
+    BinOps.pop_back();
+
+    if (!Visited.insert(BinOp.getNode()))
+      continue;
+
+    PromOps.push_back(BinOp);
+
+    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+      // The condition of the select is not promoted.
+      if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+        continue;
+      if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+        continue;
+
+      if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+            BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+            BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+           BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+          isa<ConstantSDNode>(BinOp.getOperand(i))) {
+        Inputs.push_back(BinOp.getOperand(i)); 
+      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
+                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
+                 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+                 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+                 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+                 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
+        BinOps.push_back(BinOp.getOperand(i));
+      } else {
+        // We have an input that is not an extension or another binary
+        // operation; we'll abort this transformation.
+        return SDValue();
+      }
+    }
+  }
+
+  // Make sure that this is a self-contained cluster of operations (which
+  // is not quite the same thing as saying that everything has only one
+  // use).
+  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+    if (isa<ConstantSDNode>(Inputs[i]))
+      continue;
+
+    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+                              UE = Inputs[i].getNode()->use_end();
+         UI != UE; ++UI) {
+      SDNode *User = *UI;
+      if (User != N && !Visited.count(User))
+        return SDValue();
+
+      // Make sure that we're not going to promote the non-output-value
+      // operand(s) or SELECT or SELECT_CC.
+      // FIXME: Although we could sometimes handle this, and it does occur in
+      // practice that one of the condition inputs to the select is also one of
+      // the outputs, we currently can't deal with this.
+      if (User->getOpcode() == ISD::SELECT) {
+        if (User->getOperand(0) == Inputs[i])
+          return SDValue();
+      } else if (User->getOpcode() == ISD::SELECT_CC) {
+        if (User->getOperand(0) == Inputs[i] ||
+            User->getOperand(1) == Inputs[i])
+          return SDValue();
+      }
+    }
+  }
+
+  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+                              UE = PromOps[i].getNode()->use_end();
+         UI != UE; ++UI) {
+      SDNode *User = *UI;
+      if (User != N && !Visited.count(User))
+        return SDValue();
+
+      // Make sure that we're not going to promote the non-output-value
+      // operand(s) or SELECT or SELECT_CC.
+      // FIXME: Although we could sometimes handle this, and it does occur in
+      // practice that one of the condition inputs to the select is also one of
+      // the outputs, we currently can't deal with this.
+      if (User->getOpcode() == ISD::SELECT) {
+        if (User->getOperand(0) == PromOps[i])
+          return SDValue();
+      } else if (User->getOpcode() == ISD::SELECT_CC) {
+        if (User->getOperand(0) == PromOps[i] ||
+            User->getOperand(1) == PromOps[i])
+          return SDValue();
+      }
+    }
+  }
+
+  // Replace all inputs with the extension operand.
+  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+    // Constants may have users outside the cluster of to-be-promoted nodes,
+    // and so we need to replace those as we do the promotions.
+    if (isa<ConstantSDNode>(Inputs[i]))
+      continue;
+    else
+      DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 
+  }
+
+  // Replace all operations (these are all the same, but have a different
+  // (i1) return type). DAG.getNode will validate that the types of
+  // a binary operator match, so go through the list in reverse so that
+  // we've likely promoted both operands first. Any intermediate truncations or
+  // extensions disappear.
+  while (!PromOps.empty()) {
+    SDValue PromOp = PromOps.back();
+    PromOps.pop_back();
+
+    if (PromOp.getOpcode() == ISD::TRUNCATE ||
+        PromOp.getOpcode() == ISD::SIGN_EXTEND ||
+        PromOp.getOpcode() == ISD::ZERO_EXTEND ||
+        PromOp.getOpcode() == ISD::ANY_EXTEND) {
+      if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
+          PromOp.getOperand(0).getValueType() != MVT::i1) {
+        // The operand is not yet ready (see comment below).
+        PromOps.insert(PromOps.begin(), PromOp);
+        continue;
+      }
+
+      SDValue RepValue = PromOp.getOperand(0);
+      if (isa<ConstantSDNode>(RepValue))
+        RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
+
+      DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
+      continue;
+    }
+
+    unsigned C;
+    switch (PromOp.getOpcode()) {
+    default:             C = 0; break;
+    case ISD::SELECT:    C = 1; break;
+    case ISD::SELECT_CC: C = 2; break;
+    }
+
+    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+         PromOp.getOperand(C).getValueType() != MVT::i1) ||
+        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+         PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
+      // The to-be-promoted operands of this node have not yet been
+      // promoted (this should be rare because we're going through the
+      // list backward, but if one of the operands has several users in
+      // this cluster of to-be-promoted nodes, it is possible).
+      PromOps.insert(PromOps.begin(), PromOp);
+      continue;
+    }
+
+    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+                                PromOp.getNode()->op_end());
+
+    // If there are any constant inputs, make sure they're replaced now.
+    for (unsigned i = 0; i < 2; ++i)
+      if (isa<ConstantSDNode>(Ops[C+i]))
+        Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
+
+    DAG.ReplaceAllUsesOfValueWith(PromOp,
+      DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
+  }
+
+  // Now we're left with the initial truncation itself.
+  if (N->getOpcode() == ISD::TRUNCATE)
+    return N->getOperand(0);
+
+  // Otherwise, this is a comparison. The operands to be compared have just
+  // changed type (to i1), but everything else is the same.
+  return SDValue(N, 0);
+}
+
+SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
+                                                  DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc dl(N);
+
+  // If we're tracking CR bits, we need to be careful that we don't have:
+  //   zext(binary-ops(trunc(x), trunc(y)))
+  // or
+  //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
+  // such that we're unnecessarily moving things into CR bits that can more
+  // efficiently stay in GPRs. Note that if we're not certain that the high
+  // bits are set as required by the final extension, we still may need to do
+  // some masking to get the proper behavior.
+
+  // This same functionality is important on PPC64 when dealing with
+  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
+  // the return values of functions. Because it is so similar, it is handled
+  // here as well.
+
+  if (N->getValueType(0) != MVT::i32 &&
+      N->getValueType(0) != MVT::i64)
+    return SDValue();
+
+  if (!((N->getOperand(0).getValueType() == MVT::i1 &&
+        PPCSubTarget.useCRBits()) ||
+       (N->getOperand(0).getValueType() == MVT::i32 &&
+        PPCSubTarget.isPPC64())))
+    return SDValue();
+
+  if (N->getOperand(0).getOpcode() != ISD::AND &&
+      N->getOperand(0).getOpcode() != ISD::OR  &&
+      N->getOperand(0).getOpcode() != ISD::XOR &&
+      N->getOperand(0).getOpcode() != ISD::SELECT &&
+      N->getOperand(0).getOpcode() != ISD::SELECT_CC)
+    return SDValue();
+
+  SmallVector<SDValue, 4> Inputs;
+  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
+  SmallPtrSet<SDNode *, 16> Visited;
+
+  // Visit all inputs, collect all binary operations (and, or, xor and
+  // select) that are all fed by truncations. 
+  while (!BinOps.empty()) {
+    SDValue BinOp = BinOps.back();
+    BinOps.pop_back();
+
+    if (!Visited.insert(BinOp.getNode()))
+      continue;
+
+    PromOps.push_back(BinOp);
+
+    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+      // The condition of the select is not promoted.
+      if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+        continue;
+      if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+        continue;
+
+      if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+          isa<ConstantSDNode>(BinOp.getOperand(i))) {
+        Inputs.push_back(BinOp.getOperand(i)); 
+      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
+                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
+        BinOps.push_back(BinOp.getOperand(i));
+      } else {
+        // We have an input that is not a truncation or another binary
+        // operation; we'll abort this transformation.
+        return SDValue();
+      }
+    }
+  }
+
+  // Make sure that this is a self-contained cluster of operations (which
+  // is not quite the same thing as saying that everything has only one
+  // use).
+  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+    if (isa<ConstantSDNode>(Inputs[i]))
+      continue;
+
+    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+                              UE = Inputs[i].getNode()->use_end();
+         UI != UE; ++UI) {
+      SDNode *User = *UI;
+      if (User != N && !Visited.count(User))
+        return SDValue();
+
+      // Make sure that we're not going to promote the non-output-value
+      // operand(s) or SELECT or SELECT_CC.
+      // FIXME: Although we could sometimes handle this, and it does occur in
+      // practice that one of the condition inputs to the select is also one of
+      // the outputs, we currently can't deal with this.
+      if (User->getOpcode() == ISD::SELECT) {
+        if (User->getOperand(0) == Inputs[i])
+          return SDValue();
+      } else if (User->getOpcode() == ISD::SELECT_CC) {
+        if (User->getOperand(0) == Inputs[i] ||
+            User->getOperand(1) == Inputs[i])
+          return SDValue();
+      }
+    }
+  }
+
+  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+                              UE = PromOps[i].getNode()->use_end();
+         UI != UE; ++UI) {
+      SDNode *User = *UI;
+      if (User != N && !Visited.count(User))
+        return SDValue();
+
+      // Make sure that we're not going to promote the non-output-value
+      // operand(s) or SELECT or SELECT_CC.
+      // FIXME: Although we could sometimes handle this, and it does occur in
+      // practice that one of the condition inputs to the select is also one of
+      // the outputs, we currently can't deal with this.
+      if (User->getOpcode() == ISD::SELECT) {
+        if (User->getOperand(0) == PromOps[i])
+          return SDValue();
+      } else if (User->getOpcode() == ISD::SELECT_CC) {
+        if (User->getOperand(0) == PromOps[i] ||
+            User->getOperand(1) == PromOps[i])
+          return SDValue();
+      }
+    }
+  }
+
+  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
+  bool ReallyNeedsExt = false;
+  if (N->getOpcode() != ISD::ANY_EXTEND) {
+    // If all of the inputs are not already sign/zero extended, then
+    // we'll still need to do that at the end.
+    for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+      if (isa<ConstantSDNode>(Inputs[i]))
+        continue;
+
+      unsigned OpBits =
+        Inputs[i].getOperand(0).getValueSizeInBits();
+      assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
+
+      if ((N->getOpcode() == ISD::ZERO_EXTEND &&
+           !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
+                                  APInt::getHighBitsSet(OpBits,
+                                                        OpBits-PromBits))) ||
+          (N->getOpcode() == ISD::SIGN_EXTEND &&
+           DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
+             (OpBits-(PromBits-1)))) {
+        ReallyNeedsExt = true;
+        break;
+      }
+    }
+  }
+
+  // Replace all inputs, either with the truncation operand, or a
+  // truncation or extension to the final output type.
+  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+    // Constant inputs need to be replaced with the to-be-promoted nodes that
+    // use them because they might have users outside of the cluster of
+    // promoted nodes.
+    if (isa<ConstantSDNode>(Inputs[i]))
+      continue;
+
+    SDValue InSrc = Inputs[i].getOperand(0);
+    if (Inputs[i].getValueType() == N->getValueType(0))
+      DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
+    else if (N->getOpcode() == ISD::SIGN_EXTEND)
+      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+        DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
+    else if (N->getOpcode() == ISD::ZERO_EXTEND)
+      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+        DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
+    else
+      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+        DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
+  }
+
+  // Replace all operations (these are all the same, but have a different
+  // (promoted) return type). DAG.getNode will validate that the types of
+  // a binary operator match, so go through the list in reverse so that
+  // we've likely promoted both operands first.
+  while (!PromOps.empty()) {
+    SDValue PromOp = PromOps.back();
+    PromOps.pop_back();
+
+    unsigned C;
+    switch (PromOp.getOpcode()) {
+    default:             C = 0; break;
+    case ISD::SELECT:    C = 1; break;
+    case ISD::SELECT_CC: C = 2; break;
+    }
+
+    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+         PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
+        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+         PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
+      // The to-be-promoted operands of this node have not yet been
+      // promoted (this should be rare because we're going through the
+      // list backward, but if one of the operands has several users in
+      // this cluster of to-be-promoted nodes, it is possible).
+      PromOps.insert(PromOps.begin(), PromOp);
+      continue;
+    }
+
+    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+                                PromOp.getNode()->op_end());
+
+    // If this node has constant inputs, then they'll need to be promoted here.
+    for (unsigned i = 0; i < 2; ++i) {
+      if (!isa<ConstantSDNode>(Ops[C+i]))
+        continue;
+      if (Ops[C+i].getValueType() == N->getValueType(0))
+        continue;
+
+      if (N->getOpcode() == ISD::SIGN_EXTEND)
+        Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+      else if (N->getOpcode() == ISD::ZERO_EXTEND)
+        Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+      else
+        Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+    }
+
+    DAG.ReplaceAllUsesOfValueWith(PromOp,
+      DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
+  }
+
+  // Now we're left with the initial extension itself.
+  if (!ReallyNeedsExt)
+    return N->getOperand(0);
+
+  // To zero extend, just mask off everything except for the first bit (in the
+  // i1 case).
+  if (N->getOpcode() == ISD::ZERO_EXTEND)
+    return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
+                       DAG.getConstant(APInt::getLowBitsSet(
+                                         N->getValueSizeInBits(0), PromBits),
+                                       N->getValueType(0)));
+
+  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
+         "Invalid extension type");
+  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
+  SDValue ShiftCst =
+    DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy);
+  return DAG.getNode(ISD::SRA, dl, N->getValueType(0), 
+                     DAG.getNode(ISD::SHL, dl, N->getValueType(0),
+                                 N->getOperand(0), ShiftCst), ShiftCst);
+}
+
  SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
    const TargetMachine &TM = getTargetMachine();
    SelectionDAG &DAG = DCI.DAG;
-  DebugLoc dl = N->getDebugLoc();
+  SDLoc dl(N);
    switch (N->getOpcode()) {
    default: break;
    case PPCISD::SHL:
@@ -6822,6 +7861,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
          return N->getOperand(0);
      }
      break;
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND: 
+    return DAGCombineExtBoolTrunc(N, DCI);
+  case ISD::TRUNCATE:
+  case ISD::SETCC:
+  case ISD::SELECT_CC:
+    return DAGCombineTruncBoolExt(N, DCI);
    case ISD::FDIV: {
      assert(TM.Options.UnsafeFPMath &&
             "Reciprocal estimates require UnsafeFPMath");
@@ -6829,7 +7876,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
        SDValue RV =
          DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
-      if (RV.getNode() != 0) {
+      if (RV.getNode()) {
          DCI.AddToWorklist(RV.getNode());
          return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
                             N->getOperand(0), RV);
@@ -6839,9 +7886,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        SDValue RV =
          DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
                                   DCI);
-      if (RV.getNode() != 0) {
+      if (RV.getNode()) {
          DCI.AddToWorklist(RV.getNode());
-        RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(),
+        RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
                           N->getValueType(0), RV);
          DCI.AddToWorklist(RV.getNode());
          return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
@@ -6852,9 +7899,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        SDValue RV =
          DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
                                   DCI);
-      if (RV.getNode() != 0) {
+      if (RV.getNode()) {
          DCI.AddToWorklist(RV.getNode());
-        RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(),
+        RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
                           N->getValueType(0), RV,
                           N->getOperand(1).getOperand(1));
          DCI.AddToWorklist(RV.getNode());
@@ -6864,7 +7911,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      }
  
      SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
-    if (RV.getNode() != 0) {
+    if (RV.getNode()) {
        DCI.AddToWorklist(RV.getNode());
        return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
                           N->getOperand(0), RV);
@@ -6879,11 +7926,31 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
      // reciprocal sqrt.
      SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
-    if (RV.getNode() != 0) {
+    if (RV.getNode()) {
        DCI.AddToWorklist(RV.getNode());
        RV = DAGCombineFastRecip(RV, DCI);
-      if (RV.getNode() != 0)
+      if (RV.getNode()) {
+       // Unfortunately, RV is now NaN if the input was exactly 0. Select out
+       // this case and force the answer to 0.
+
+        EVT VT = RV.getValueType();
+
+        SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
+        if (VT.isVector()) {
+          assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
+          Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
+        }
+
+        SDValue ZeroCmp =
+          DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
+                       N->getOperand(0), Zero, ISD::SETEQ);
+        DCI.AddToWorklist(ZeroCmp.getNode());
+        DCI.AddToWorklist(RV.getNode());
+
+        RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
+                         ZeroCmp, Zero, RV);
          return RV;
+      }
      }
  
      }
@@ -6940,7 +8007,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        };
  
        Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
-              DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+              DAG.getVTList(MVT::Other), Ops,
                cast<StoreSDNode>(N)->getMemoryVT(),
                cast<StoreSDNode>(N)->getMemOperand());
        DCI.AddToWorklist(Val.getNode());
@@ -6967,10 +8034,163 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        };
        return
          DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
-                                Ops, array_lengthof(Ops),
-                                cast<StoreSDNode>(N)->getMemoryVT(),
+                                Ops, cast<StoreSDNode>(N)->getMemoryVT(),
                                  cast<StoreSDNode>(N)->getMemOperand());
      }
+    break;
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    EVT VT = LD->getValueType(0);
+    Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+    unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
+    if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
+        TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+        (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+         VT == MVT::v4i32 || VT == MVT::v4f32) &&
+        LD->getAlignment() < ABIAlignment) {
+      // This is a type-legal unaligned Altivec load.
+      SDValue Chain = LD->getChain();
+      SDValue Ptr = LD->getBasePtr();
+
+      // This implements the loading of unaligned vectors as described in
+      // the venerable Apple Velocity Engine overview. Specifically:
+      // https://developer.apple.com/hardwaredrivers/ve/alignment.html
+      // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
+      //
+      // The general idea is to expand a sequence of one or more unaligned
+      // loads into a alignment-based permutation-control instruction (lvsl),
+      // a series of regular vector loads (which always truncate their
+      // input address to an aligned address), and a series of permutations.
+      // The results of these permutations are the requested loaded values.
+      // The trick is that the last "extra" load is not taken from the address
+      // you might suspect (sizeof(vector) bytes after the last requested
+      // load), but rather sizeof(vector) - 1 bytes after the last
+      // requested vector. The point of this is to avoid a page fault if the
+      // base address happened to be aligned. This works because if the base
+      // address is aligned, then adding less than a full vector length will
+      // cause the last vector in the sequence to be (re)loaded. Otherwise,
+      // the next vector will be fetched as you might suspect was necessary.
+
+      // We might be able to reuse the permutation generation from
+      // a different base address offset from this one by an aligned amount.
+      // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
+      // optimization later.
+      SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr,
+                                          DAG, dl, MVT::v16i8);
+
+      // Refine the alignment of the original load (a "new" load created here
+      // which was identical to the first except for the alignment would be
+      // merged with the existing node regardless).
+      MachineFunction &MF = DAG.getMachineFunction();
+      MachineMemOperand *MMO =
+        MF.getMachineMemOperand(LD->getPointerInfo(),
+                                LD->getMemOperand()->getFlags(),
+                                LD->getMemoryVT().getStoreSize(),
+                                ABIAlignment);
+      LD->refineAlignment(MMO);
+      SDValue BaseLoad = SDValue(LD, 0);
+
+      // Note that the value of IncOffset (which is provided to the next
+      // load's pointer info offset value, and thus used to calculate the
+      // alignment), and the value of IncValue (which is actually used to
+      // increment the pointer value) are different! This is because we
+      // require the next load to appear to be aligned, even though it
+      // is actually offset from the base pointer by a lesser amount.
+      int IncOffset = VT.getSizeInBits() / 8;
+      int IncValue = IncOffset;
+
+      // Walk (both up and down) the chain looking for another load at the real
+      // (aligned) offset (the alignment of the other load does not matter in
+      // this case). If found, then do not use the offset reduction trick, as
+      // that will prevent the loads from being later combined (as they would
+      // otherwise be duplicates).
+      if (!findConsecutiveLoad(LD, DAG))
+        --IncValue;
+
+      SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
+      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+
+      SDValue ExtraLoad =
+        DAG.getLoad(VT, dl, Chain, Ptr,
+                    LD->getPointerInfo().getWithOffset(IncOffset),
+                    LD->isVolatile(), LD->isNonTemporal(),
+                    LD->isInvariant(), ABIAlignment);
+
+      SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+        BaseLoad.getValue(1), ExtraLoad.getValue(1));
+
+      if (BaseLoad.getValueType() != MVT::v4i32)
+        BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad);
+
+      if (ExtraLoad.getValueType() != MVT::v4i32)
+        ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
+
+      SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+                                      BaseLoad, ExtraLoad, PermCntl, DAG, dl);
+
+      if (VT != MVT::v4i32)
+        Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
+
+      // Now we need to be really careful about how we update the users of the
+      // original load. We cannot just call DCI.CombineTo (or
+      // DAG.ReplaceAllUsesWith for that matter), because the load still has
+      // uses created here (the permutation for example) that need to stay.
+      SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+      while (UI != UE) {
+        SDUse &Use = UI.getUse();
+        SDNode *User = *UI;
+        // Note: BaseLoad is checked here because it might not be N, but a
+        // bitcast of N.
+        if (User == Perm.getNode() || User == BaseLoad.getNode() ||
+            User == TF.getNode() || Use.getResNo() > 1) {
+          ++UI;
+          continue;
+        }
+
+        SDValue To = Use.getResNo() ? TF : Perm;
+        ++UI;
+
+        SmallVector<SDValue, 8> Ops;
+        for (SDNode::op_iterator O = User->op_begin(),
+             OE = User->op_end(); O != OE; ++O) {
+          if (*O == Use)
+            Ops.push_back(To);
+          else
+            Ops.push_back(*O);
+        }
+
+        DAG.UpdateNodeOperands(User, Ops.data(), Ops.size());
+      }
+
+      return SDValue(N, 0);
+    }
+    }
+    break;
+  case ISD::INTRINSIC_WO_CHAIN:
+    if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() ==
+          Intrinsic::ppc_altivec_lvsl &&
+        N->getOperand(1)->getOpcode() == ISD::ADD) {
+      SDValue Add = N->getOperand(1);
+
+      if (DAG.MaskedValueIsZero(Add->getOperand(1),
+            APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
+              Add.getValueType().getScalarType().getSizeInBits()))) {
+        SDNode *BasePtr = Add->getOperand(0).getNode();
+        for (SDNode::use_iterator UI = BasePtr->use_begin(),
+             UE = BasePtr->use_end(); UI != UE; ++UI) {
+          if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+              cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
+                Intrinsic::ppc_altivec_lvsl) {
+            // We've found another LVSL, and this address if an aligned
+            // multiple of that one. The results will be the same, so use the
+            // one we've just found instead.
+
+            return SDValue(*UI, 0);
+          }
+        }
+      }
+    }
+
      break;
    case ISD::BSWAP:
      // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
@@ -6992,7 +8212,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
          DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
                                  DAG.getVTList(N->getValueType(0) == MVT::i64 ?
                                                MVT::i64 : MVT::i32, MVT::Other),
-                                Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
+                                Ops, LD->getMemoryVT(), LD->getMemOperand());
  
        // If this is an i16 load, insert the truncate.
        SDValue ResVal = BSLoad;
@@ -7022,7 +8242,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
          !N->getOperand(2).hasOneUse()) {
  
        // Scan all of the users of the LHS, looking for VCMPo's that match.
-      SDNode *VCMPoNode = 0;
+      SDNode *VCMPoNode = nullptr;
  
        SDNode *LHSN = N->getOperand(0).getNode();
        for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
@@ -7043,9 +8263,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        // Look at the (necessarily single) use of the flag value.  If it has a
        // chain, this transformation is more complex.  Note that multiple things
        // could use the value result, which we should ignore.
-      SDNode *FlagUser = 0;
+      SDNode *FlagUser = nullptr;
        for (SDNode::use_iterator UI = VCMPoNode->use_begin();
-           FlagUser == 0; ++UI) {
+           FlagUser == nullptr; ++UI) {
          assert(UI != VCMPoNode->use_end() && "Didn't find user!");
          SDNode *User = *UI;
          for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
@@ -7056,20 +8276,72 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
          }
        }
  
-      // If the user is a MFCR instruction, we know this is safe.  Otherwise we
-      // give up for right now.
-      if (FlagUser->getOpcode() == PPCISD::MFCR)
+      // If the user is a MFOCRF instruction, we know this is safe.
+      // Otherwise we give up for right now.
+      if (FlagUser->getOpcode() == PPCISD::MFOCRF)
          return SDValue(VCMPoNode, 0);
      }
      break;
    }
+  case ISD::BRCOND: {
+    SDValue Cond = N->getOperand(1);
+    SDValue Target = N->getOperand(2);
+ 
+    if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+        cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
+          Intrinsic::ppc_is_decremented_ctr_nonzero) {
+
+      // We now need to make the intrinsic dead (it cannot be instruction
+      // selected).
+      DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
+      assert(Cond.getNode()->hasOneUse() &&
+             "Counter decrement has more than one use");
+
+      return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
+                         N->getOperand(0), Target);
+    }
+  }
+  break;
    case ISD::BR_CC: {
      // If this is a branch on an altivec predicate comparison, lower this so
-    // that we don't have to do a MFCR: instead, branch directly on CR6.  This
+    // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
      // lowering is done pre-legalize, because the legalizer lowers the predicate
      // compare down to code that is difficult to reassemble.
      ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
      SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
+
+    // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
+    // value. If so, pass-through the AND to get to the intrinsic.
+    if (LHS.getOpcode() == ISD::AND &&
+        LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+        cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
+          Intrinsic::ppc_is_decremented_ctr_nonzero &&
+        isa<ConstantSDNode>(LHS.getOperand(1)) &&
+        !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
+          isZero())
+      LHS = LHS.getOperand(0);
+
+    if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+        cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
+          Intrinsic::ppc_is_decremented_ctr_nonzero &&
+        isa<ConstantSDNode>(RHS)) {
+      assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+             "Counter decrement comparison is not EQ or NE");
+
+      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+      bool isBDNZ = (CC == ISD::SETEQ && Val) ||
+                    (CC == ISD::SETNE && !Val);
+
+      // We now need to make the intrinsic dead (it cannot be instruction
+      // selected).
+      DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
+      assert(LHS.getNode()->hasOneUse() &&
+             "Counter decrement has more than one use");
+
+      return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
+                         N->getOperand(0), N->getOperand(4));
+    }
+
      int CompareOpc;
      bool isDot;
  
@@ -7098,7 +8370,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
          DAG.getConstant(CompareOpc, MVT::i32)
        };
        EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
-      SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+      SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
  
        // Unpack the result based on how the target uses it.
        PPC::Predicate CompOpc;
@@ -7194,6 +8466,11 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
        // suboptimal.
        return C_Memory;
      }
+  } else if (Constraint == "wc") { // individual CR bits.
+    return C_RegisterClass;
+  } else if (Constraint == "wa" || Constraint == "wd" ||
+             Constraint == "wf" || Constraint == "ws") {
+    return C_RegisterClass; // VSX registers.
    }
    return TargetLowering::getConstraintType(Constraint);
  }
@@ -7208,10 +8485,21 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
    Value *CallOperandVal = info.CallOperandVal;
      // If we don't have a value, we can't do a match,
      // but allow it at the lowest weight.
-  if (CallOperandVal == NULL)
+  if (!CallOperandVal)
      return CW_Default;
    Type *type = CallOperandVal->getType();
+
    // Look at the constraint type.
+  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
+    return CW_Register; // an individual CR bit.
+  else if ((StringRef(constraint) == "wa" ||
+            StringRef(constraint) == "wd" ||
+            StringRef(constraint) == "wf") &&
+           type->isVectorTy())
+    return CW_Register;
+  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
+    return CW_Register;
+
    switch (*constraint) {
    default:
      weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
@@ -7244,7 +8532,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
  
  std::pair<unsigned, const TargetRegisterClass*>
  PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                EVT VT) const {
+                                                MVT VT) const {
    if (Constraint.size() == 1) {
      // GCC RS6000 Constraint Letters
      switch (Constraint[0]) {
@@ -7267,9 +8555,33 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
      case 'y':   // crrc
        return std::make_pair(0U, &PPC::CRRCRegClass);
      }
-  }
-
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  } else if (Constraint == "wc") { // an individual CR bit.
+    return std::make_pair(0U, &PPC::CRBITRCRegClass);
+  } else if (Constraint == "wa" || Constraint == "wd" ||
+             Constraint == "wf") {
+    return std::make_pair(0U, &PPC::VSRCRegClass);
+  } else if (Constraint == "ws") {
+    return std::make_pair(0U, &PPC::VSFRCRegClass);
+  }
+
+  std::pair<unsigned, const TargetRegisterClass*> R =
+    TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
+  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
+  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
+  // register.
+  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
+  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
+  if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&
+      PPC::GPRCRegClass.contains(R.first)) {
+    const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+    return std::make_pair(TRI->getMatchingSuperReg(R.first,
+                            PPC::sub_32, &PPC::G8RCRegClass),
+                          &PPC::G8RCRegClass);
+  }
+
+  return R;
  }
  
  
@@ -7279,7 +8591,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                       std::string &Constraint,
                                                       std::vector<SDValue>&Ops,
                                                       SelectionDAG &DAG) const {
-  SDValue Result(0,0);
+  SDValue Result;
  
    // Only support length 1 constraints.
    if (Constraint.length() > 1) return;
@@ -7379,25 +8691,16 @@ bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
    return true;
  }
  
-/// isLegalAddressImmediate - Return true if the integer value can be used
-/// as the offset of the target addressing mode for load / store of the
-/// given type.
-bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
-  // PPC allows a sign-extended 16-bit immediate field.
-  return (V > -(1 << 16) && V < (1 << 16)-1);
-}
-
-bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
-  return false;
-}
-
  SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
                                             SelectionDAG &DAG) const {
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
    MFI->setReturnAddressIsTaken(true);
  
-  DebugLoc dl = Op.getDebugLoc();
+  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+    return SDValue();
+
+  SDLoc dl(Op);
    unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
  
    // Make sure the function does not optimize away the store of the RA to
@@ -7427,7 +8730,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
  
  SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
                                            SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
  
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -7484,7 +8787,44 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
    }
  }
  
+/// \brief Returns true if it is beneficial to convert a load of a constant
+/// to just the constant itself.
+bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+                                                          Type *Ty) const {
+  assert(Ty->isIntegerTy());
+
+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
+  if (BitSize == 0 || BitSize > 64)
+    return false;
+  return true;
+}
+
+bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+    return false;
+  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+  return NumBits1 == 64 && NumBits2 == 32;
+}
+
+bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+  if (!VT1.isInteger() || !VT2.isInteger())
+    return false;
+  unsigned NumBits1 = VT1.getSizeInBits();
+  unsigned NumBits2 = VT2.getSizeInBits();
+  return NumBits1 == 64 && NumBits2 == 32;
+}
+
+bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  return isInt<16>(Imm) || isUInt<16>(Imm);
+}
+
+bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+  return isInt<16>(Imm) || isUInt<16>(Imm);
+}
+
  bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+                                                      unsigned,
                                                        bool *Fast) const {
    if (DisablePPCUnaligned)
      return false;
@@ -7498,8 +8838,14 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
    if (!VT.isSimple())
      return false;
  
-  if (VT.getSimpleVT().isVector())
-    return false;
+  if (VT.getSimpleVT().isVector()) {
+    if (PPCSubTarget.hasVSX()) {
+      if (VT != MVT::v2f64 && VT != MVT::v2i64)
+        return false;
+    } else {
+      return false;
+    }
+  }
  
    if (VT == MVT::ppcf128)
      return false;
@@ -7510,18 +8856,15 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
    return true;
  }
  
-/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-/// is expanded to mul + add.
-bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  VT = VT.getScalarType();
+
    if (!VT.isSimple())
      return false;
  
    switch (VT.getSimpleVT().SimpleTy) {
    case MVT::f32:
    case MVT::f64:
-  case MVT::v4f32:
      return true;
    default:
      break;
@@ -7530,10 +8873,25 @@ bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
    return false;
  }
  
+bool
+PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
+                     EVT VT , unsigned DefinedValues) const {
+  if (VT == MVT::v2i64)
+    return false;
+
+  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
+}
+
  Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
-  if (DisableILPPref)
+  if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
      return TargetLowering::getSchedulingPreference(N);
  
    return Sched::ILP;
  }
  
+// Create a fast isel object.
+FastISel *
+PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
+                                  const TargetLibraryInfo *LibInfo) const {
+  return PPC::createFastISel(FuncInfo, LibInfo);
+}