Fixup PPC Darwin i1 argument handling

[oota-llvm.git] / lib / Target / PowerPC / PPCISelLowering.cpp
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index f47376cc51b90ead7bcc66060739259caabcf5f6..680112da9a8a6a79fd442ac1b71855200f20e29a 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -37,21 +37,6 @@
  #include "llvm/Target/TargetOptions.h"
  using namespace llvm;
  
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                       CCValAssign::LocInfo &LocInfo,
-                                       ISD::ArgFlagsTy &ArgFlags,
-                                       CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                              MVT &LocVT,
-                                              CCValAssign::LocInfo &LocInfo,
-                                              ISD::ArgFlagsTy &ArgFlags,
-                                              CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                                MVT &LocVT,
-                                                CCValAssign::LocInfo &LocInfo,
-                                                ISD::ArgFlagsTy &ArgFlags,
-                                                CCState &State);
-
  static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
  cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
  
@@ -61,6 +46,9 @@ cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hi
  static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
  cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
  
+// FIXME: Remove this once the bug has been fixed!
+extern cl::opt<bool> ANDIGlueBug;
+
  static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
    if (TM.getSubtargetImpl()->isDarwin())
      return new TargetLoweringObjectFileMachO();
@@ -74,8 +62,6 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
  PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
    const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
-  PPCRegInfo = TM.getRegisterInfo();
-  PPCII = TM.getInstrInfo();
  
    setPow2DivIsCheap();
  
@@ -111,6 +97,39 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
    setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
  
+  if (Subtarget->useCRBits()) {
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+    if (isPPC64 || Subtarget->hasFPCVT()) {
+      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+      AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
+                         isPPC64 ? MVT::i64 : MVT::i32);
+      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+      AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, 
+                         isPPC64 ? MVT::i64 : MVT::i32);
+    } else {
+      setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
+    }
+
+    // PowerPC does not support direct load / store of condition registers
+    setOperationAction(ISD::LOAD, MVT::i1, Custom);
+    setOperationAction(ISD::STORE, MVT::i1, Custom);
+
+    // FIXME: Remove this once the ANDI glue bug is fixed:
+    if (ANDIGlueBug)
+      setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
+
+    setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+    setTruncStoreAction(MVT::i64, MVT::i1, Expand);
+    setTruncStoreAction(MVT::i32, MVT::i1, Expand);
+    setTruncStoreAction(MVT::i16, MVT::i1, Expand);
+    setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+
+    addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
+  }
+
    // This is used in the ppcf128->int sequence.  Note it has different semantics
    // from FP_ROUND:  that rounds to nearest, this rounds to zero.
    setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
@@ -166,28 +185,24 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
          Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
      setOperationAction(ISD::FSQRT, MVT::f32, Expand);
  
-  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
-  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+  if (Subtarget->hasFCPSGN()) {
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
+  } else {
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+  }
  
    if (Subtarget->hasFPRND()) {
      setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
      setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
      setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+    setOperationAction(ISD::FROUND, MVT::f64, Legal);
  
      setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
      setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
      setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
-
-    // frin does not implement "ties to even." Thus, this is safe only in
-    // fast-math mode.
-    if (TM.Options.UnsafeFPMath) {
-      setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
-      setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
-
-      // These need to set FE_INEXACT, and use a custom inserter.
-      setOperationAction(ISD::FRINT, MVT::f64, Legal);
-      setOperationAction(ISD::FRINT, MVT::f32, Legal);
-    }
+    setOperationAction(ISD::FROUND, MVT::f32, Legal);
    }
  
    // PowerPC does not have BSWAP, CTPOP or CTTZ
@@ -212,21 +227,25 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    setOperationAction(ISD::ROTR, MVT::i32   , Expand);
    setOperationAction(ISD::ROTR, MVT::i64   , Expand);
  
-  // PowerPC does not have Select
-  setOperationAction(ISD::SELECT, MVT::i32, Expand);
-  setOperationAction(ISD::SELECT, MVT::i64, Expand);
-  setOperationAction(ISD::SELECT, MVT::f32, Expand);
-  setOperationAction(ISD::SELECT, MVT::f64, Expand);
+  if (!Subtarget->useCRBits()) {
+    // PowerPC does not have Select
+    setOperationAction(ISD::SELECT, MVT::i32, Expand);
+    setOperationAction(ISD::SELECT, MVT::i64, Expand);
+    setOperationAction(ISD::SELECT, MVT::f32, Expand);
+    setOperationAction(ISD::SELECT, MVT::f64, Expand);
+  }
  
    // PowerPC wants to turn select_cc of FP into fsel when possible.
    setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
    setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
  
    // PowerPC wants to optimize integer setcc a bit
-  setOperationAction(ISD::SETCC, MVT::i32, Custom);
+  if (!Subtarget->useCRBits())
+    setOperationAction(ISD::SETCC, MVT::i32, Custom);
  
    // PowerPC does not have BRCOND which requires SetCC
-  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+  if (!Subtarget->useCRBits())
+    setOperationAction(ISD::BRCOND, MVT::Other, Expand);
  
    setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
  
@@ -245,11 +264,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    // We cannot sextinreg(i1).  Expand to shifts.
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
  
-  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
-  setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
-  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
-  setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
-
    // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
    // SjLj exception handling but a light-weight setjmp/longjmp replacement to
    // support continuation, user-level threading, and etc.. As a result, no
@@ -302,8 +316,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    } else
      setOperationAction(ISD::VAARG, MVT::Other, Expand);
  
+  if (Subtarget->isSVR4ABI() && !isPPC64)
+    // VACOPY is custom lowered with the 32-bit SVR4 ABI.
+    setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
+  else
+    setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
+
    // Use the default implementation.
-  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
    setOperationAction(ISD::VAEND             , MVT::Other, Expand);
    setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
    setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
@@ -414,6 +433,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
        setOperationAction(ISD::UDIV, VT, Expand);
        setOperationAction(ISD::UREM, VT, Expand);
        setOperationAction(ISD::FDIV, VT, Expand);
+      setOperationAction(ISD::FREM, VT, Expand);
        setOperationAction(ISD::FNEG, VT, Expand);
        setOperationAction(ISD::FSQRT, VT, Expand);
        setOperationAction(ISD::FLOG, VT, Expand);
@@ -465,7 +485,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
      setOperationAction(ISD::OR    , MVT::v4i32, Legal);
      setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
      setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
-    setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+    setOperationAction(ISD::SELECT, MVT::v4i32,
+                       Subtarget->useCRBits() ? Legal : Expand);
      setOperationAction(ISD::STORE , MVT::v4i32, Legal);
      setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
      setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
@@ -508,6 +529,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
      setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
      setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
      setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
+
+    setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
+    setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
    }
  
    if (Subtarget->has64BitSupport()) {
@@ -539,9 +563,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    setTargetDAGCombine(ISD::LOAD);
    setTargetDAGCombine(ISD::STORE);
    setTargetDAGCombine(ISD::BR_CC);
+  if (Subtarget->useCRBits())
+    setTargetDAGCombine(ISD::BRCOND);
    setTargetDAGCombine(ISD::BSWAP);
    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
  
+  setTargetDAGCombine(ISD::SIGN_EXTEND);
+  setTargetDAGCombine(ISD::ZERO_EXTEND);
+  setTargetDAGCombine(ISD::ANY_EXTEND);
+
+  if (Subtarget->useCRBits()) {
+    setTargetDAGCombine(ISD::TRUNCATE);
+    setTargetDAGCombine(ISD::SETCC);
+    setTargetDAGCombine(ISD::SELECT_CC);
+  }
+
    // Use reciprocal estimates.
    if (TM.Options.UnsafeFPMath) {
      setTargetDAGCombine(ISD::FDIV);
@@ -562,6 +598,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
      setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
    }
  
+  // With 32 condition bits, we don't need to sink (and duplicate) compares
+  // aggressively in CodeGenPrep.
+  if (Subtarget->useCRBits())
+    setHasMultipleConditionRegisters();
+
    setMinFunctionAlignment(2);
    if (PPCSubTarget.isDarwin())
      setPrefFunctionAlignment(4);
@@ -573,7 +614,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
  
    setInsertFencesForAtomic(true);
  
-  setSchedulingPreference(Sched::Hybrid);
+  if (Subtarget->enableMachineScheduler())
+    setSchedulingPreference(Sched::Source);
+  else
+    setSchedulingPreference(Sched::Hybrid);
  
    computeRegisterProperties();
  
@@ -592,24 +636,47 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    }
  }
  
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
+                             unsigned MaxMaxAlign) {
+  if (MaxAlign == MaxMaxAlign)
+    return;
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+    if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
+      MaxAlign = 32;
+    else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
+      MaxAlign = 16;
+  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    unsigned EltAlign = 0;
+    getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
+    if (EltAlign > MaxAlign)
+      MaxAlign = EltAlign;
+  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      unsigned EltAlign = 0;
+      getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
+      if (EltAlign > MaxAlign)
+        MaxAlign = EltAlign;
+      if (MaxAlign == MaxMaxAlign)
+        break;
+    }
+  }
+}
+
  /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
  /// function arguments in the caller parameter area.
  unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
-  const TargetMachine &TM = getTargetMachine();
    // Darwin passes everything on 4 byte boundary.
-  if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+  if (PPCSubTarget.isDarwin())
      return 4;
  
    // 16byte and wider vectors are passed on 16byte boundary.
-  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-    if (VTy->getBitWidth() >= 128)
-      return 16;
-
    // The rest is 8 on PPC64 and 4 on PPC32 boundary.
-   if (PPCSubTarget.isPPC64())
-     return 8;
-
-  return 4;
+  unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
+  if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
+    getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
+  return Align;
  }
  
  const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -643,7 +710,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
    case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
    case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
-  case PPCISD::MFCR:            return "PPCISD::MFCR";
+  case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
    case PPCISD::VCMP:            return "PPCISD::VCMP";
    case PPCISD::VCMPo:           return "PPCISD::VCMPo";
    case PPCISD::LBRX:            return "PPCISD::LBRX";
@@ -661,6 +728,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case PPCISD::ADDIS_TOC_HA:    return "PPCISD::ADDIS_TOC_HA";
    case PPCISD::LD_TOC_L:        return "PPCISD::LD_TOC_L";
    case PPCISD::ADDI_TOC_L:      return "PPCISD::ADDI_TOC_L";
+  case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
    case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
    case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
    case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
@@ -679,7 +747,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
  
  EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
    if (!VT.isVector())
-    return MVT::i32;
+    return PPCSubTarget.useCRBits() ? MVT::i1 : MVT::i32;
    return VT.changeVectorElementTypeToInteger();
  }
  
@@ -1048,6 +1116,46 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
    return false;
  }
  
+// If we happen to be doing an i64 load or store into a stack slot that has
+// less than a 4-byte alignment, then the frame-index elimination may need to
+// use an indexed load or store instruction (because the offset may not be a
+// multiple of 4). The extra register needed to hold the offset comes from the
+// register scavenger, and it is possible that the scavenger will need to use
+// an emergency spill slot. As a result, we need to make sure that a spill slot
+// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
+// stack slot.
+static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
+  // FIXME: This does not handle the LWA case.
+  if (VT != MVT::i64)
+    return;
+
+  // NOTE: We'll exclude negative FIs here, which come from argument
+  // lowering, because there are no known test cases triggering this problem
+  // using packed structures (or similar). We can remove this exclusion if
+  // we find such a test case. The reason why this is so test-case driven is
+  // because this entire 'fixup' is only to prevent crashes (from the
+  // register scavenger) on not-really-valid inputs. For example, if we have:
+  //   %a = alloca i1
+  //   %b = bitcast i1* %a to i64*
+  //   store i64* a, i64 b
+  // then the store should really be marked as 'align 1', but is not. If it
+  // were marked as 'align 1' then the indexed form would have been
+  // instruction-selected initially, and the problem this 'fixup' is preventing
+  // won't happen regardless.
+  if (FrameIdx < 0)
+    return;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  unsigned Align = MFI->getObjectAlignment(FrameIdx);
+  if (Align >= 4)
+    return;
+
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  FuncInfo->setHasNonRISpills();
+}
+
  /// Returns true if the address N can be represented by a base register plus
  /// a signed 16-bit displacement [r+imm], and if it is not better
  /// represented as reg+reg.  If Aligned is true, only accept displacements
@@ -1069,6 +1177,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
        Disp = DAG.getTargetConstant(imm, N.getValueType());
        if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
          Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+        fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
        } else {
          Base = N.getOperand(0);
        }
@@ -1133,9 +1242,10 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
    }
  
    Disp = DAG.getTargetConstant(0, getPointerTy());
-  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
      Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
-  else
+    fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+  } else
      Base = N;
    return true;      // [r+0]
  }
@@ -1253,8 +1363,8 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
  /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
  static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
                                 unsigned &LoOpFlags, const GlobalValue *GV = 0) {
-  HiOpFlags = PPCII::MO_HA16;
-  LoOpFlags = PPCII::MO_LO16;
+  HiOpFlags = PPCII::MO_HA;
+  LoOpFlags = PPCII::MO_LO;
  
    // Don't use the pic base if not in PIC relocation model.  Or if we are on a
    // non-darwin platform.  We don't support PIC on other platforms yet.
@@ -1357,6 +1467,10 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
  SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
  
+  // FIXME: TLS addresses currently use medium model code sequences,
+  // which is the most useful form.  Eventually support for small and
+  // large models could be added if users need it, at the cost of
+  // additional complexity.
    GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
    SDLoc dl(GA);
    const GlobalValue *GV = GA->getGlobal();
@@ -1367,26 +1481,29 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
  
    if (Model == TLSModel::LocalExec) {
      SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                               PPCII::MO_TPREL16_HA);
+                                               PPCII::MO_TPREL_HA);
      SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                               PPCII::MO_TPREL16_LO);
+                                               PPCII::MO_TPREL_LO);
      SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
                                       is64bit ? MVT::i64 : MVT::i32);
      SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
      return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
    }
  
-  if (!is64bit)
-    llvm_unreachable("only local-exec is currently supported for ppc32");
-
    if (Model == TLSModel::InitialExec) {
      SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
-    SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
-    SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
-                                     PtrVT, GOTReg, TGA);
+    SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                                PPCII::MO_TLS);
+    SDValue GOTPtr;
+    if (is64bit) {
+      SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+      GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+                           PtrVT, GOTReg, TGA);
+    } else
+      GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
      SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
-                                   PtrVT, TGA, TPOffsetHi);
-    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+                                   PtrVT, TGA, GOTPtr);
+    return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
    }
  
    if (Model == TLSModel::GeneralDynamic) {
@@ -1624,6 +1741,18 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
                       false, false, false, 0);
  }
  
+SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
+                                       const PPCSubtarget &Subtarget) const {
+  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
+
+  // We have to copy the entire va_list struct:
+  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
+  return DAG.getMemcpy(Op.getOperand(0), Op,
+                       Op.getOperand(1), Op.getOperand(2),
+                       DAG.getConstant(12, MVT::i32), 8, false, true,
+                       MachinePointerInfo(), MachinePointerInfo());
+}
+
  SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
                                                    SelectionDAG &DAG) const {
    return Op.getOperand(0);
@@ -1771,18 +1900,24 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
  
  #include "PPCGenCallingConv.inc"
  
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                       CCValAssign::LocInfo &LocInfo,
-                                       ISD::ArgFlagsTy &ArgFlags,
-                                       CCState &State) {
+// Function whose sole purpose is to kill compiler warnings 
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
+  return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
+}
+
+bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                      CCValAssign::LocInfo &LocInfo,
+                                      ISD::ArgFlagsTy &ArgFlags,
+                                      CCState &State) {
    return true;
  }
  
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                              MVT &LocVT,
-                                              CCValAssign::LocInfo &LocInfo,
-                                              ISD::ArgFlagsTy &ArgFlags,
-                                              CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                             MVT &LocVT,
+                                             CCValAssign::LocInfo &LocInfo,
+                                             ISD::ArgFlagsTy &ArgFlags,
+                                             CCState &State) {
    static const uint16_t ArgRegs[] = {
      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
@@ -1805,11 +1940,11 @@ static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
    return false;
  }
  
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                                MVT &LocVT,
-                                                CCValAssign::LocInfo &LocInfo,
-                                                ISD::ArgFlagsTy &ArgFlags,
-                                                CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                               MVT &LocVT,
+                                               CCValAssign::LocInfo &LocInfo,
+                                               ISD::ArgFlagsTy &ArgFlags,
+                                               CCState &State) {
    static const uint16_t ArgRegs[] = {
      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
      PPC::F8
@@ -1847,7 +1982,7 @@ static const uint16_t *GetFPR() {
  /// the stack.
  static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
                                         unsigned PtrByteSize) {
-  unsigned ArgSize = ArgVT.getSizeInBits()/8;
+  unsigned ArgSize = ArgVT.getStoreSize();
    if (Flags.isByVal())
      ArgSize = Flags.getByValSize();
    ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
@@ -1945,6 +2080,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
        switch (ValVT.getSimpleVT().SimpleTy) {
          default:
            llvm_unreachable("ValVT not supported by formal arguments Lowering");
+        case MVT::i1:
          case MVT::i32:
            RC = &PPC::GPRCRegClass;
            break;
@@ -1964,14 +2100,18 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
  
        // Transform the arguments stored in physical registers into virtual ones.
        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
+                                            ValVT == MVT::i1 ? MVT::i32 : ValVT);
+
+      if (ValVT == MVT::i1)
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
  
        InVals.push_back(ArgValue);
      } else {
        // Argument stored in memory.
        assert(VA.isMemLoc());
  
-      unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+      unsigned ArgSize = VA.getLocVT().getStoreSize();
        int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
                                        isImmutable);
  
@@ -2107,7 +2247,7 @@ PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
      ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
                           DAG.getValueType(ObjectVT));
  
-  return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
  }
  
  // Set the size that is at least reserved in caller of this function.  Tail
@@ -2190,7 +2330,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
      SDValue ArgVal;
      bool needsLoad = false;
      EVT ObjectVT = Ins[ArgNo].VT;
-    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+    unsigned ObjSize = ObjectVT.getStoreSize();
      unsigned ArgSize = ObjSize;
      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
      std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
@@ -2233,6 +2373,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
          InVals.push_back(FIN);
          continue;
        }
+
+      unsigned BVAlign = Flags.getByValAlign();
+      if (BVAlign > 8) {
+        ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+        CurArgOffset = ArgOffset;
+      }
+
        // All aggregates smaller than 8 bytes must be passed right-justified.
        if (ObjSize < PtrByteSize)
          CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
@@ -2251,7 +2398,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
              EVT ObjType = (ObjSize == 1 ? MVT::i8 :
                             (ObjSize == 2 ? MVT::i16 : MVT::i32));
              Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                                      MachinePointerInfo(FuncArg, CurArgOffset),
+                                      MachinePointerInfo(FuncArg),
                                        ObjType, false, false, 0);
            } else {
              // For sizes that don't fit a truncating store (3, 5, 6, 7),
@@ -2263,7 +2410,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
              int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
              SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
              Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                 MachinePointerInfo(FuncArg, ArgOffset),
+                                 MachinePointerInfo(FuncArg),
                                   false, false, 0);
            }
  
@@ -2287,7 +2434,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
            SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
            SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                       MachinePointerInfo(FuncArg, ArgOffset),
+                                       MachinePointerInfo(FuncArg, j),
                                         false, false, 0);
            MemOps.push_back(Store);
            ++GPR_idx;
@@ -2302,13 +2449,14 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
  
      switch (ObjectVT.getSimpleVT().SimpleTy) {
      default: llvm_unreachable("Unhandled argument type!");
+    case MVT::i1:
      case MVT::i32:
      case MVT::i64:
        if (GPR_idx != Num_GPR_Regs) {
          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
  
-        if (ObjectVT == MVT::i32)
+        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
            // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
            // value to MVT::i64 and then truncate to the correct register size.
            ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
@@ -2499,6 +2647,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
  
        switch(ObjectVT.getSimpleVT().SimpleTy) {
        default: llvm_unreachable("Unhandled argument type!");
+      case MVT::i1:
        case MVT::i32:
        case MVT::f32:
          VecArgOffset += 4;
@@ -2583,8 +2732,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
            SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
            EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
            SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                                            MachinePointerInfo(FuncArg,
-                                              CurArgOffset),
+                                            MachinePointerInfo(FuncArg),
                                              ObjType, false, false, 0);
            MemOps.push_back(Store);
            ++GPR_idx;
@@ -2608,7 +2756,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
            SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
            SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
            SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                       MachinePointerInfo(FuncArg, ArgOffset),
+                                       MachinePointerInfo(FuncArg, j),
                                         false, false, 0);
            MemOps.push_back(Store);
            ++GPR_idx;
@@ -2623,11 +2771,16 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
  
      switch (ObjectVT.getSimpleVT().SimpleTy) {
      default: llvm_unreachable("Unhandled argument type!");
+    case MVT::i1:
      case MVT::i32:
        if (!isPPC64) {
          if (GPR_idx != Num_GPR_Regs) {
            unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
            ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+
+          if (ObjectVT == MVT::i1)
+            ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
+
            ++GPR_idx;
          } else {
            needsLoad = true;
@@ -2643,7 +2796,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
  
-        if (ObjectVT == MVT::i32)
+        if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
            // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
            // value to MVT::i64 and then truncate to the correct register size.
            ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
@@ -2886,7 +3039,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
         if (Flags.isByVal()) return false;
      }
  
-    // Non PIC/GOT  tail calls are supported.
+    // Non-PIC/GOT tail calls are supported.
      if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
        return true;
  
@@ -2931,8 +3084,8 @@ struct TailCallArgumentInfo {
  static void
  StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
                                             SDValue Chain,
-                   const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
-                   SmallVector<SDValue, 8> &MemOpChains,
+                   const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
+                   SmallVectorImpl<SDValue> &MemOpChains,
                     SDLoc dl) {
    for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
      SDValue Arg = TailCallArgs[i].Arg;
@@ -2990,7 +3143,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
  static void
  CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
                           SDValue Arg, int SPDiff, unsigned ArgOffset,
-                      SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+                     SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
    int Offset = ArgOffset + SPDiff;
    uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
    int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
@@ -3055,8 +3208,8 @@ static void
  LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
                   SDValue Arg, SDValue PtrOff, int SPDiff,
                   unsigned ArgOffset, bool isPPC64, bool isTailCall,
-                 bool isVector, SmallVector<SDValue, 8> &MemOpChains,
-                 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
+                 bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
+                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
                   SDLoc dl) {
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    if (!isTailCall) {
@@ -3080,7 +3233,7 @@ static
  void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
                       SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
                       SDValue LROp, SDValue FPOp, bool isDarwinABI,
-                     SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+                     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
    MachineFunction &MF = DAG.getMachineFunction();
  
    // Emit a sequence of copyto/copyfrom virtual registers for arguments that
@@ -3100,15 +3253,15 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
  
    // Emit callseq_end just before tailcall node.
    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(0, true), InFlag);
+                             DAG.getIntPtrConstant(0, true), InFlag, dl);
    InFlag = Chain.getValue(1);
  }
  
  static
  unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
                       SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
-                     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
-                     SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
+                     SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
+                     SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
                       const PPCSubtarget &PPCSubTarget) {
  
    bool isPPC64 = PPCSubTarget.isPPC64();
@@ -3405,7 +3558,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
        // from allocating it), resulting in an additional register being
        // allocated and an unnecessary move instruction being generated.
        needsTOCRestore = true;
-    } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
+    } else if ((CallOpc == PPCISD::CALL) &&
+               (!isLocalCall(Callee) ||
+                DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
        // Otherwise insert NOP for non-local calls.
        CallOpc = PPCISD::CALL_NOP;
      }
@@ -3422,7 +3577,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
  
    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
                               DAG.getIntPtrConstant(BytesCalleePops, true),
-                             InFlag);
+                             InFlag, dl);
    if (!Ins.empty())
      InFlag = Chain.getValue(1);
  
@@ -3434,10 +3589,10 @@ SDValue
  PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                               SmallVectorImpl<SDValue> &InVals) const {
    SelectionDAG &DAG                     = CLI.DAG;
-  SDLoc &dl                          = CLI.DL;
-  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
-  SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
-  SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
+  SDLoc &dl                             = CLI.DL;
+  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+  SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
+  SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
    SDValue Chain                         = CLI.Chain;
    SDValue Callee                        = CLI.Callee;
    bool &isTailCall                      = CLI.IsTailCall;
@@ -3557,7 +3712,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
  
    // Adjust the stack pointer for the new arguments...
    // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               dl);
    SDValue CallSeqStart = Chain;
  
    // Load the return address and frame pointer so it can be moved somewhere else
@@ -3608,7 +3764,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
  
        // This must go outside the CALLSEQ_START..END.
        SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
-                           CallSeqStart.getNode()->getOperand(1));
+                           CallSeqStart.getNode()->getOperand(1),
+                           SDLoc(MemcpyCall));
        DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
                               NewCallSeqStart.getNode());
        Chain = CallSeqStart = NewCallSeqStart;
@@ -3620,6 +3777,9 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
      }
  
      if (VA.isRegLoc()) {
+      if (Arg.getValueType() == MVT::i1)
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
+
        seenFloatArg |= VA.getLocVT().isFloatingPoint();
        // Put argument in a physical register.
        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
@@ -3690,7 +3850,8 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
                          Flags, DAG, dl);
    // The MEMCPY must go outside the CALLSEQ_START..END.
    SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
-                             CallSeqStart.getNode()->getOperand(1));
+                             CallSeqStart.getNode()->getOperand(1),
+                             SDLoc(MemcpyCall));
    DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
                           NewCallSeqStart.getNode());
    return NewCallSeqStart;
@@ -3744,7 +3905,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
  
    // Adjust the stack pointer for the new arguments...
    // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               dl);
    SDValue CallSeqStart = Chain;
  
    // Load the return address and frame pointer so it can be move somewhere else
@@ -3796,7 +3958,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
  
      // Promote integers to 64-bit values.
-    if (Arg.getValueType() == MVT::i32) {
+    if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
        // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
        unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
        Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
@@ -3818,6 +3980,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        if (Size == 0)
          continue;
  
+      unsigned BVAlign = Flags.getByValAlign();
+      if (BVAlign > 8) {
+        if (BVAlign % PtrByteSize != 0)
+          llvm_unreachable(
+            "ByVal alignment is not a multiple of the pointer size");
+
+        ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+      }
+
        // All aggregates smaller than 8 bytes must be passed right-justified.
        if (Size==1 || Size==2 || Size==4) {
          EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -3909,8 +4080,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
        continue;
      }
  
-    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+    switch (Arg.getSimpleValueType().SimpleTy) {
      default: llvm_unreachable("Unexpected ValueType for argument!");
+    case MVT::i1:
      case MVT::i32:
      case MVT::i64:
        if (GPR_idx != NumGPRs) {
@@ -3932,7 +4104,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
            // must be passed right-justified in the stack doubleword, and
            // in the GPR, if one is available.
            SDValue StoreOff;
-          if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) {
+          if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
              SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
              StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
            } else
@@ -4115,7 +4287,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
  
    // Adjust the stack pointer for the new arguments...
    // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               dl);
    SDValue CallSeqStart = Chain;
  
    // Load the return address and frame pointer so it can be move somewhere else
@@ -4239,11 +4412,15 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
        continue;
      }
  
-    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+    switch (Arg.getSimpleValueType().SimpleTy) {
      default: llvm_unreachable("Unexpected ValueType for argument!");
+    case MVT::i1:
      case MVT::i32:
      case MVT::i64:
        if (GPR_idx != NumGPRs) {
+        if (Arg.getValueType() == MVT::i1)
+          Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
+
          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
        } else {
          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
@@ -4595,6 +4772,55 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
                       Op.getOperand(0), Op.getOperand(1));
  }
  
+SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::i1 &&
+         "Custom lowering only for i1 loads");
+
+  // First, load 8 bits into 32 bits, then truncate to 1 bit.
+
+  SDLoc dl(Op);
+  LoadSDNode *LD = cast<LoadSDNode>(Op);
+
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  MachineMemOperand *MMO = LD->getMemOperand();
+
+  SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
+                                 BasePtr, MVT::i8, MMO);
+  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
+
+  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
+         "Custom lowering only for i1 stores");
+
+  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
+
+  SDLoc dl(Op);
+  StoreSDNode *ST = cast<StoreSDNode>(Op);
+
+  SDValue Chain = ST->getChain();
+  SDValue BasePtr = ST->getBasePtr();
+  SDValue Value = ST->getValue();
+  MachineMemOperand *MMO = ST->getMemOperand();
+
+  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
+  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
+}
+
+// FIXME: Remove this once the ANDI glue bug is fixed:
+SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::i1 &&
+         "Custom lowering only for i1 results");
+
+  SDLoc DL(Op);
+  return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
+                     Op.getOperand(0));
+}
+
  /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
  /// possible.
  SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
@@ -4704,7 +4930,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
      Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
  
    SDValue Tmp;
-  switch (Op.getValueType().getSimpleVT().SimpleTy) {
+  switch (Op.getSimpleValueType().SimpleTy) {
    default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
    case MVT::i32:
      Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
@@ -4761,6 +4987,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
    if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
      return SDValue();
  
+  if (Op.getOperand(0).getValueType() == MVT::i1)
+    return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
+                       DAG.getConstantFP(1.0, Op.getValueType()),
+                       DAG.getConstantFP(0.0, Op.getValueType()));
+
    assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) &&
           "UINT_TO_FP is supported only with FPCVT");
  
@@ -5551,7 +5782,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
  
    // Now that we have the comparison, emit a copy from the CR to a GPR.
    // This is flagged to the above dot comparison.
-  SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
                                  DAG.getRegister(PPC::CR6, MVT::i32),
                                  CompNode.getValue(1));
  
@@ -5684,6 +5915,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::VAARG:
      return LowerVAARG(Op, DAG, PPCSubTarget);
  
+  case ISD::VACOPY:
+    return LowerVACOPY(Op, DAG, PPCSubTarget);
+
    case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
    case ISD::DYNAMIC_STACKALLOC:
      return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
@@ -5691,6 +5925,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
    case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
  
+  case ISD::LOAD:               return LowerLOAD(Op, DAG);
+  case ISD::STORE:              return LowerSTORE(Op, DAG);
+  case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
    case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
    case ISD::FP_TO_UINT:
    case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
@@ -5779,6 +6016,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
      return;
    }
    case ISD::FP_TO_SINT:
+    // LowerFP_TO_INT() can only handle f32 and f64.
+    if (N->getOperand(0).getValueType() == MVT::ppcf128)
+      return;
      Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
      return;
    }
@@ -5811,8 +6051,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
    F->insert(It, loopMBB);
    F->insert(It, exitMBB);
    exitMBB->splice(exitMBB->begin(), BB,
-                  llvm::next(MachineBasicBlock::iterator(MI)),
-                  BB->end());
+                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
  
    MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -5880,8 +6119,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
    F->insert(It, loopMBB);
    F->insert(It, exitMBB);
    exitMBB->splice(exitMBB->begin(), BB,
-                  llvm::next(MachineBasicBlock::iterator(MI)),
-                  BB->end());
+                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
  
    MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -6033,7 +6271,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
  
    // Transfer the remainder of BB and its successor edges to sinkMBB.
    sinkMBB->splice(sinkMBB->begin(), MBB,
-                  llvm::next(MachineBasicBlock::iterator(MI)), MBB->end());
+                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
    sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
  
    // Note that the structure of the jmp_buf used here is not compatible
@@ -6050,6 +6288,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
    // thisMBB:
    const int64_t LabelOffset = 1 * PVT.getStoreSize();
    const int64_t TOCOffset   = 3 * PVT.getStoreSize();
+  const int64_t BPOffset    = 4 * PVT.getStoreSize();
  
    // Prepare IP either in reg.
    const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
@@ -6061,13 +6300,30 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
              .addReg(PPC::X2)
              .addImm(TOCOffset)
              .addReg(BufReg);
-
      MIB.setMemRefs(MMOBegin, MMOEnd);
    }
  
+  // Naked functions never have a base pointer, and so we use r1. For all
+  // other functions, this decision must be delayed until during PEI.
+  unsigned BaseReg;
+  if (MF->getFunction()->getAttributes().hasAttribute(
+          AttributeSet::FunctionIndex, Attribute::Naked))
+    BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;
+  else
+    BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;
+
+  MIB = BuildMI(*thisMBB, MI, DL,
+                TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))
+          .addReg(BaseReg)
+          .addImm(BPOffset)
+          .addReg(BufReg);
+  MIB.setMemRefs(MMOBegin, MMOEnd);
+
    // Setup
    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
-  MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+  const PPCRegisterInfo *TRI =
+    static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
+  MIB.addRegMask(TRI->getNoPreservedMask());
  
    BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
  
@@ -6134,12 +6390,14 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
    // Since FP is only updated here but NOT referenced, it's treated as GPR.
    unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
    unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+  unsigned BP  = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
  
    MachineInstrBuilder MIB;
  
    const int64_t LabelOffset = 1 * PVT.getStoreSize();
    const int64_t SPOffset    = 2 * PVT.getStoreSize();
    const int64_t TOCOffset   = 3 * PVT.getStoreSize();
+  const int64_t BPOffset    = 4 * PVT.getStoreSize();
  
    unsigned BufReg = MI->getOperand(0).getReg();
  
@@ -6181,8 +6439,17 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
    }
    MIB.setMemRefs(MMOBegin, MMOEnd);
  
-  // FIXME: When we also support base pointers, that register must also be
-  // restored here.
+  // Reload BP
+  if (PVT == MVT::i64) {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
+            .addImm(BPOffset)
+            .addReg(BufReg);
+  } else {
+    MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
+            .addImm(BPOffset)
+            .addReg(BufReg);
+  }
+  MIB.setMemRefs(MMOBegin, MMOEnd);
  
    // Reload TOC
    if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
@@ -6224,21 +6491,32 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
    MachineFunction *F = BB->getParent();
  
    if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
-                                 MI->getOpcode() == PPC::SELECT_CC_I8)) {
+                                 MI->getOpcode() == PPC::SELECT_CC_I8 ||
+                                 MI->getOpcode() == PPC::SELECT_I4 ||
+                                 MI->getOpcode() == PPC::SELECT_I8)) {
      SmallVector<MachineOperand, 2> Cond;
-    Cond.push_back(MI->getOperand(4));
+    if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+        MI->getOpcode() == PPC::SELECT_CC_I8)
+      Cond.push_back(MI->getOperand(4));
+    else
+      Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
      Cond.push_back(MI->getOperand(1));
  
      DebugLoc dl = MI->getDebugLoc();
-    PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond,
-                        MI->getOperand(2).getReg(), MI->getOperand(3).getReg());
+    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+    TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
+                      Cond, MI->getOperand(2).getReg(),
+                      MI->getOperand(3).getReg());
    } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
               MI->getOpcode() == PPC::SELECT_CC_I8 ||
               MI->getOpcode() == PPC::SELECT_CC_F4 ||
               MI->getOpcode() == PPC::SELECT_CC_F8 ||
-             MI->getOpcode() == PPC::SELECT_CC_VRRC) {
-
-
+             MI->getOpcode() == PPC::SELECT_CC_VRRC ||
+             MI->getOpcode() == PPC::SELECT_I4 ||
+             MI->getOpcode() == PPC::SELECT_I8 ||
+             MI->getOpcode() == PPC::SELECT_F4 ||
+             MI->getOpcode() == PPC::SELECT_F8 ||
+             MI->getOpcode() == PPC::SELECT_VRRC) {
      // The incoming instruction knows the destination vreg to set, the
      // condition code register to branch on, the true/false values to
      // select between, and a branch opcode to use.
@@ -6252,23 +6530,31 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      MachineBasicBlock *thisMBB = BB;
      MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
      MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
-    unsigned SelectPred = MI->getOperand(4).getImm();
      DebugLoc dl = MI->getDebugLoc();
      F->insert(It, copy0MBB);
      F->insert(It, sinkMBB);
  
      // Transfer the remainder of BB and its successor edges to sinkMBB.
      sinkMBB->splice(sinkMBB->begin(), BB,
-                    llvm::next(MachineBasicBlock::iterator(MI)),
-                    BB->end());
+                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
      sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
  
      // Next, add the true and fallthrough blocks as its successors.
      BB->addSuccessor(copy0MBB);
      BB->addSuccessor(sinkMBB);
  
-    BuildMI(BB, dl, TII->get(PPC::BCC))
-      .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+    if (MI->getOpcode() == PPC::SELECT_I4 ||
+        MI->getOpcode() == PPC::SELECT_I8 ||
+        MI->getOpcode() == PPC::SELECT_F4 ||
+        MI->getOpcode() == PPC::SELECT_F8 ||
+        MI->getOpcode() == PPC::SELECT_VRRC) {
+      BuildMI(BB, dl, TII->get(PPC::BC))
+        .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+    } else {
+      unsigned SelectPred = MI->getOperand(4).getImm();
+      BuildMI(BB, dl, TII->get(PPC::BCC))
+        .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+    }
  
      //  copy0MBB:
      //   %FalseValue = ...
@@ -6370,8 +6656,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      F->insert(It, midMBB);
      F->insert(It, exitMBB);
      exitMBB->splice(exitMBB->begin(), BB,
-                    llvm::next(MachineBasicBlock::iterator(MI)),
-                    BB->end());
+                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
      exitMBB->transferSuccessorsAndUpdatePHIs(BB);
  
      //  thisMBB:
@@ -6441,8 +6726,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      F->insert(It, midMBB);
      F->insert(It, exitMBB);
      exitMBB->splice(exitMBB->begin(), BB,
-                    llvm::next(MachineBasicBlock::iterator(MI)),
-                    BB->end());
+                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
      exitMBB->transferSuccessorsAndUpdatePHIs(BB);
  
      MachineRegisterInfo &RegInfo = F->getRegInfo();
@@ -6591,51 +6875,27 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  
      // Restore FPSCR value.
      BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
-  } else if (MI->getOpcode() == PPC::FRINDrint ||
-             MI->getOpcode() == PPC::FRINSrint) {
-    bool isf32 = MI->getOpcode() == PPC::FRINSrint;
-    unsigned Dest = MI->getOperand(0).getReg();
-    unsigned Src = MI->getOperand(1).getReg();
-    DebugLoc dl   = MI->getDebugLoc();
+  } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+             MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
+             MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+             MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {
+    unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+                       MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?
+                      PPC::ANDIo8 : PPC::ANDIo;
+    bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+                 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);
  
      MachineRegisterInfo &RegInfo = F->getRegInfo();
-    unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
+    unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
+                                                  &PPC::GPRCRegClass :
+                                                  &PPC::G8RCRegClass);
  
-    // Perform the rounding.
-    BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
-      .addReg(Src);
-
-    // Compare the results.
-    BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
-      .addReg(Dest).addReg(Src);
-
-    // If the results were not equal, then set the FPSCR XX bit.
-    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
-    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
-    F->insert(It, midMBB);
-    F->insert(It, exitMBB);
-    exitMBB->splice(exitMBB->begin(), BB,
-                    llvm::next(MachineBasicBlock::iterator(MI)),
-                    BB->end());
-    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
-    BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
-      .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
-
-    BB->addSuccessor(midMBB);
-    BB->addSuccessor(exitMBB);
-
-    BB = midMBB;
-
-    // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
-    // the FI bit here because that will not automatically set XX also,
-    // and XX is what libm interprets as the FE_INEXACT flag.
-    BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
-    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
-
-    BB->addSuccessor(exitMBB);
-
-    BB = exitMBB;
+    DebugLoc dl   = MI->getDebugLoc();
+    BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
+      .addReg(MI->getOperand(1).getReg()).addImm(1);
+    BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
+            MI->getOperand(0).getReg())
+      .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
    } else {
      llvm_unreachable("Unexpected instr type to insert");
    }
@@ -6781,6 +7041,645 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
    return SDValue();
  }
  
+// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
+// not enforce equality of the chain operands.
+static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
+                            unsigned Bytes, int Dist,
+                            SelectionDAG &DAG) {
+  EVT VT = LS->getMemoryVT();
+  if (VT.getSizeInBits() / 8 != Bytes)
+    return false;
+
+  SDValue Loc = LS->getBasePtr();
+  SDValue BaseLoc = Base->getBasePtr();
+  if (Loc.getOpcode() == ISD::FrameIndex) {
+    if (BaseLoc.getOpcode() != ISD::FrameIndex)
+      return false;
+    const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
+    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+    int FS  = MFI->getObjectSize(FI);
+    int BFS = MFI->getObjectSize(BFI);
+    if (FS != BFS || FS != (int)Bytes) return false;
+    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+  }
+
+  // Handle X+C
+  if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+    return true;
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  const GlobalValue *GV1 = NULL;
+  const GlobalValue *GV2 = NULL;
+  int64_t Offset1 = 0;
+  int64_t Offset2 = 0;
+  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+  if (isGA1 && isGA2 && GV1 == GV2)
+    return Offset1 == (Offset2 + Dist*Bytes);
+  return false;
+}
+
+// Return true is there is a nearyby consecutive load to the one provided
+// (regardless of alignment). We search up and down the chain, looking though
+// token factors and other loads (but nothing else). As a result, a true
+// results indicates that it is safe to create a new consecutive load adjacent
+// to the load provided.
+static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
+  SDValue Chain = LD->getChain();
+  EVT VT = LD->getMemoryVT();
+
+  SmallSet<SDNode *, 16> LoadRoots;
+  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
+  SmallSet<SDNode *, 16> Visited;
+
+  // First, search up the chain, branching to follow all token-factor operands.
+  // If we find a consecutive load, then we're done, otherwise, record all
+  // nodes just above the top-level loads and token factors.
+  while (!Queue.empty()) {
+    SDNode *ChainNext = Queue.pop_back_val();
+    if (!Visited.insert(ChainNext))
+      continue;
+
+    if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+      if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+        return true;
+
+      if (!Visited.count(ChainLD->getChain().getNode()))
+        Queue.push_back(ChainLD->getChain().getNode());
+    } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
+      for (SDNode::op_iterator O = ChainNext->op_begin(),
+           OE = ChainNext->op_end(); O != OE; ++O)
+        if (!Visited.count(O->getNode()))
+          Queue.push_back(O->getNode());
+    } else
+      LoadRoots.insert(ChainNext);
+  }
+
+  // Second, search down the chain, starting from the top-level nodes recorded
+  // in the first phase. These top-level nodes are the nodes just above all
+  // loads and token factors. Starting with their uses, recursively look though
+  // all loads (just the chain uses) and token factors to find a consecutive
+  // load.
+  Visited.clear();
+  Queue.clear();
+
+  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
+       IE = LoadRoots.end(); I != IE; ++I) {
+    Queue.push_back(*I);
+       
+    while (!Queue.empty()) {
+      SDNode *LoadRoot = Queue.pop_back_val();
+      if (!Visited.insert(LoadRoot))
+        continue;
+
+      if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+        if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+          return true;
+
+      for (SDNode::use_iterator UI = LoadRoot->use_begin(),
+           UE = LoadRoot->use_end(); UI != UE; ++UI)
+        if (((isa<LoadSDNode>(*UI) &&
+            cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+            UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
+          Queue.push_back(*UI);
+    }
+  }
+
+  return false;
+}
+
+SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
+                                                  DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc dl(N);
+
+  assert(PPCSubTarget.useCRBits() &&
+         "Expecting to be tracking CR bits");
+  // If we're tracking CR bits, we need to be careful that we don't have:
+  //   trunc(binary-ops(zext(x), zext(y)))
+  // or
+  //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
+  // such that we're unnecessarily moving things into GPRs when it would be
+  // better to keep them in CR bits.
+
+  // Note that trunc here can be an actual i1 trunc, or can be the effective
+  // truncation that comes from a setcc or select_cc.
+  if (N->getOpcode() == ISD::TRUNCATE &&
+      N->getValueType(0) != MVT::i1)
+    return SDValue();
+
+  if (N->getOperand(0).getValueType() != MVT::i32 &&
+      N->getOperand(0).getValueType() != MVT::i64)
+    return SDValue();
+
+  if (N->getOpcode() == ISD::SETCC ||
+      N->getOpcode() == ISD::SELECT_CC) {
+    // If we're looking at a comparison, then we need to make sure that the
+    // high bits (all except for the first) don't matter the result.
+    ISD::CondCode CC =
+      cast<CondCodeSDNode>(N->getOperand(
+        N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
+    unsigned OpBits = N->getOperand(0).getValueSizeInBits();
+
+    if (ISD::isSignedIntSetCC(CC)) {
+      if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
+          DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
+        return SDValue();
+    } else if (ISD::isUnsignedIntSetCC(CC)) {
+      if (!DAG.MaskedValueIsZero(N->getOperand(0),
+                                 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
+          !DAG.MaskedValueIsZero(N->getOperand(1),
+                                 APInt::getHighBitsSet(OpBits, OpBits-1)))
+        return SDValue();
+    } else {
+      // This is neither a signed nor an unsigned comparison, just make sure
+      // that the high bits are equal.
+      APInt Op1Zero, Op1One;
+      APInt Op2Zero, Op2One;
+      DAG.ComputeMaskedBits(N->getOperand(0), Op1Zero, Op1One);
+      DAG.ComputeMaskedBits(N->getOperand(1), Op2Zero, Op2One);
+
+      // We don't really care about what is known about the first bit (if
+      // anything), so clear it in all masks prior to comparing them.
+      Op1Zero.clearBit(0); Op1One.clearBit(0);
+      Op2Zero.clearBit(0); Op2One.clearBit(0);
+
+      if (Op1Zero != Op2Zero || Op1One != Op2One)
+        return SDValue();
+    }
+  }
+
+  // We now know that the higher-order bits are irrelevant, we just need to
+  // make sure that all of the intermediate operations are bit operations, and
+  // all inputs are extensions.
+  if (N->getOperand(0).getOpcode() != ISD::AND &&
+      N->getOperand(0).getOpcode() != ISD::OR  &&
+      N->getOperand(0).getOpcode() != ISD::XOR &&
+      N->getOperand(0).getOpcode() != ISD::SELECT &&
+      N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
+      N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
+      N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
+      N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
+      N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
+    return SDValue();
+
+  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
+      N->getOperand(1).getOpcode() != ISD::AND &&
+      N->getOperand(1).getOpcode() != ISD::OR  &&
+      N->getOperand(1).getOpcode() != ISD::XOR &&
+      N->getOperand(1).getOpcode() != ISD::SELECT &&
+      N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
+      N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
+      N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
+      N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
+      N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
+    return SDValue();
+
+  SmallVector<SDValue, 4> Inputs;
+  SmallVector<SDValue, 8> BinOps, PromOps;
+  SmallPtrSet<SDNode *, 16> Visited;
+
+  for (unsigned i = 0; i < 2; ++i) {
+    if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+          N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+          N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+          N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+        isa<ConstantSDNode>(N->getOperand(i)))
+      Inputs.push_back(N->getOperand(i));
+    else
+      BinOps.push_back(N->getOperand(i));
+
+    if (N->getOpcode() == ISD::TRUNCATE)
+      break;
+  }
+
+  // Visit all inputs, collect all binary operations (and, or, xor and
+  // select) that are all fed by extensions. 
+  while (!BinOps.empty()) {
+    SDValue BinOp = BinOps.back();
+    BinOps.pop_back();
+
+    if (!Visited.insert(BinOp.getNode()))
+      continue;
+
+    PromOps.push_back(BinOp);
+
+    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+      // The condition of the select is not promoted.
+      if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+        continue;
+      if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+        continue;
+
+      if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+            BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+            BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+           BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+          isa<ConstantSDNode>(BinOp.getOperand(i))) {
+        Inputs.push_back(BinOp.getOperand(i)); 
+      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
+                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
+                 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+                 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+                 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+                 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
+        BinOps.push_back(BinOp.getOperand(i));
+      } else {
+        // We have an input that is not an extension or another binary
+        // operation; we'll abort this transformation.
+        return SDValue();
+      }
+    }
+  }
+
+  // Make sure that this is a self-contained cluster of operations (which
+  // is not quite the same thing as saying that everything has only one
+  // use).
+  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+    if (isa<ConstantSDNode>(Inputs[i]))
+      continue;
+
+    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+                              UE = Inputs[i].getNode()->use_end();
+         UI != UE; ++UI) {
+      SDNode *User = *UI;
+      if (User != N && !Visited.count(User))
+        return SDValue();
+
+      // Make sure that we're not going to promote the non-output-value
+      // operand(s) or SELECT or SELECT_CC.
+      // FIXME: Although we could sometimes handle this, and it does occur in
+      // practice that one of the condition inputs to the select is also one of
+      // the outputs, we currently can't deal with this.
+      if (User->getOpcode() == ISD::SELECT) {
+        if (User->getOperand(0) == Inputs[i])
+          return SDValue();
+      } else if (User->getOpcode() == ISD::SELECT_CC) {
+        if (User->getOperand(0) == Inputs[i] ||
+            User->getOperand(1) == Inputs[i])
+          return SDValue();
+      }
+    }
+  }
+
+  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+                              UE = PromOps[i].getNode()->use_end();
+         UI != UE; ++UI) {
+      SDNode *User = *UI;
+      if (User != N && !Visited.count(User))
+        return SDValue();
+
+      // Make sure that we're not going to promote the non-output-value
+      // operand(s) or SELECT or SELECT_CC.
+      // FIXME: Although we could sometimes handle this, and it does occur in
+      // practice that one of the condition inputs to the select is also one of
+      // the outputs, we currently can't deal with this.
+      if (User->getOpcode() == ISD::SELECT) {
+        if (User->getOperand(0) == PromOps[i])
+          return SDValue();
+      } else if (User->getOpcode() == ISD::SELECT_CC) {
+        if (User->getOperand(0) == PromOps[i] ||
+            User->getOperand(1) == PromOps[i])
+          return SDValue();
+      }
+    }
+  }
+
+  // Replace all inputs with the extension operand.
+  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+    // Constants may have users outside the cluster of to-be-promoted nodes,
+    // and so we need to replace those as we do the promotions.
+    if (isa<ConstantSDNode>(Inputs[i]))
+      continue;
+    else
+      DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 
+  }
+
+  // Replace all operations (these are all the same, but have a different
+  // (i1) return type). DAG.getNode will validate that the types of
+  // a binary operator match, so go through the list in reverse so that
+  // we've likely promoted both operands first. Any intermediate truncations or
+  // extensions disappear.
+  while (!PromOps.empty()) {
+    SDValue PromOp = PromOps.back();
+    PromOps.pop_back();
+
+    if (PromOp.getOpcode() == ISD::TRUNCATE ||
+        PromOp.getOpcode() == ISD::SIGN_EXTEND ||
+        PromOp.getOpcode() == ISD::ZERO_EXTEND ||
+        PromOp.getOpcode() == ISD::ANY_EXTEND) {
+      if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
+          PromOp.getOperand(0).getValueType() != MVT::i1) {
+        // The operand is not yet ready (see comment below).
+        PromOps.insert(PromOps.begin(), PromOp);
+        continue;
+      }
+
+      SDValue RepValue = PromOp.getOperand(0);
+      if (isa<ConstantSDNode>(RepValue))
+        RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
+
+      DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
+      continue;
+    }
+
+    unsigned C;
+    switch (PromOp.getOpcode()) {
+    default:             C = 0; break;
+    case ISD::SELECT:    C = 1; break;
+    case ISD::SELECT_CC: C = 2; break;
+    }
+
+    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+         PromOp.getOperand(C).getValueType() != MVT::i1) ||
+        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+         PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
+      // The to-be-promoted operands of this node have not yet been
+      // promoted (this should be rare because we're going through the
+      // list backward, but if one of the operands has several users in
+      // this cluster of to-be-promoted nodes, it is possible).
+      PromOps.insert(PromOps.begin(), PromOp);
+      continue;
+    }
+
+    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+                                PromOp.getNode()->op_end());
+
+    // If there are any constant inputs, make sure they're replaced now.
+    for (unsigned i = 0; i < 2; ++i)
+      if (isa<ConstantSDNode>(Ops[C+i]))
+        Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
+
+    DAG.ReplaceAllUsesOfValueWith(PromOp,
+      DAG.getNode(PromOp.getOpcode(), dl, MVT::i1,
+                  Ops.data(), Ops.size()));
+  }
+
+  // Now we're left with the initial truncation itself.
+  if (N->getOpcode() == ISD::TRUNCATE)
+    return N->getOperand(0);
+
+  // Otherwise, this is a comparison. The operands to be compared have just
+  // changed type (to i1), but everything else is the same.
+  return SDValue(N, 0);
+}
+
+SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
+                                                  DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc dl(N);
+
+  // If we're tracking CR bits, we need to be careful that we don't have:
+  //   zext(binary-ops(trunc(x), trunc(y)))
+  // or
+  //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
+  // such that we're unnecessarily moving things into CR bits that can more
+  // efficiently stay in GPRs. Note that if we're not certain that the high
+  // bits are set as required by the final extension, we still may need to do
+  // some masking to get the proper behavior.
+
+  // This same functionality is important on PPC64 when dealing with
+  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
+  // the return values of functions. Because it is so similar, it is handled
+  // here as well.
+
+  if (N->getValueType(0) != MVT::i32 &&
+      N->getValueType(0) != MVT::i64)
+    return SDValue();
+
+  if (!((N->getOperand(0).getValueType() == MVT::i1 &&
+        PPCSubTarget.useCRBits()) ||
+       (N->getOperand(0).getValueType() == MVT::i32 &&
+        PPCSubTarget.isPPC64())))
+    return SDValue();
+
+  if (N->getOperand(0).getOpcode() != ISD::AND &&
+      N->getOperand(0).getOpcode() != ISD::OR  &&
+      N->getOperand(0).getOpcode() != ISD::XOR &&
+      N->getOperand(0).getOpcode() != ISD::SELECT &&
+      N->getOperand(0).getOpcode() != ISD::SELECT_CC)
+    return SDValue();
+
+  SmallVector<SDValue, 4> Inputs;
+  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
+  SmallPtrSet<SDNode *, 16> Visited;
+
+  // Visit all inputs, collect all binary operations (and, or, xor and
+  // select) that are all fed by truncations. 
+  while (!BinOps.empty()) {
+    SDValue BinOp = BinOps.back();
+    BinOps.pop_back();
+
+    if (!Visited.insert(BinOp.getNode()))
+      continue;
+
+    PromOps.push_back(BinOp);
+
+    for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+      // The condition of the select is not promoted.
+      if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+        continue;
+      if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+        continue;
+
+      if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+          isa<ConstantSDNode>(BinOp.getOperand(i))) {
+        Inputs.push_back(BinOp.getOperand(i)); 
+      } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+                 BinOp.getOperand(i).getOpcode() == ISD::OR  ||
+                 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+                 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+                 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
+        BinOps.push_back(BinOp.getOperand(i));
+      } else {
+        // We have an input that is not a truncation or another binary
+        // operation; we'll abort this transformation.
+        return SDValue();
+      }
+    }
+  }
+
+  // Make sure that this is a self-contained cluster of operations (which
+  // is not quite the same thing as saying that everything has only one
+  // use).
+  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+    if (isa<ConstantSDNode>(Inputs[i]))
+      continue;
+
+    for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+                              UE = Inputs[i].getNode()->use_end();
+         UI != UE; ++UI) {
+      SDNode *User = *UI;
+      if (User != N && !Visited.count(User))
+        return SDValue();
+
+      // Make sure that we're not going to promote the non-output-value
+      // operand(s) or SELECT or SELECT_CC.
+      // FIXME: Although we could sometimes handle this, and it does occur in
+      // practice that one of the condition inputs to the select is also one of
+      // the outputs, we currently can't deal with this.
+      if (User->getOpcode() == ISD::SELECT) {
+        if (User->getOperand(0) == Inputs[i])
+          return SDValue();
+      } else if (User->getOpcode() == ISD::SELECT_CC) {
+        if (User->getOperand(0) == Inputs[i] ||
+            User->getOperand(1) == Inputs[i])
+          return SDValue();
+      }
+    }
+  }
+
+  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+    for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+                              UE = PromOps[i].getNode()->use_end();
+         UI != UE; ++UI) {
+      SDNode *User = *UI;
+      if (User != N && !Visited.count(User))
+        return SDValue();
+
+      // Make sure that we're not going to promote the non-output-value
+      // operand(s) or SELECT or SELECT_CC.
+      // FIXME: Although we could sometimes handle this, and it does occur in
+      // practice that one of the condition inputs to the select is also one of
+      // the outputs, we currently can't deal with this.
+      if (User->getOpcode() == ISD::SELECT) {
+        if (User->getOperand(0) == PromOps[i])
+          return SDValue();
+      } else if (User->getOpcode() == ISD::SELECT_CC) {
+        if (User->getOperand(0) == PromOps[i] ||
+            User->getOperand(1) == PromOps[i])
+          return SDValue();
+      }
+    }
+  }
+
+  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
+  bool ReallyNeedsExt = false;
+  if (N->getOpcode() != ISD::ANY_EXTEND) {
+    // If all of the inputs are not already sign/zero extended, then
+    // we'll still need to do that at the end.
+    for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+      if (isa<ConstantSDNode>(Inputs[i]))
+        continue;
+
+      unsigned OpBits =
+        Inputs[i].getOperand(0).getValueSizeInBits();
+      assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
+
+      if ((N->getOpcode() == ISD::ZERO_EXTEND &&
+           !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
+                                  APInt::getHighBitsSet(OpBits,
+                                                        OpBits-PromBits))) ||
+          (N->getOpcode() == ISD::SIGN_EXTEND &&
+           DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
+             (OpBits-(PromBits-1)))) {
+        ReallyNeedsExt = true;
+        break;
+      }
+    }
+  }
+
+  // Replace all inputs, either with the truncation operand, or a
+  // truncation or extension to the final output type.
+  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+    // Constant inputs need to be replaced with the to-be-promoted nodes that
+    // use them because they might have users outside of the cluster of
+    // promoted nodes.
+    if (isa<ConstantSDNode>(Inputs[i]))
+      continue;
+
+    SDValue InSrc = Inputs[i].getOperand(0);
+    if (Inputs[i].getValueType() == N->getValueType(0))
+      DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
+    else if (N->getOpcode() == ISD::SIGN_EXTEND)
+      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+        DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
+    else if (N->getOpcode() == ISD::ZERO_EXTEND)
+      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+        DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
+    else
+      DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+        DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
+  }
+
+  // Replace all operations (these are all the same, but have a different
+  // (promoted) return type). DAG.getNode will validate that the types of
+  // a binary operator match, so go through the list in reverse so that
+  // we've likely promoted both operands first.
+  while (!PromOps.empty()) {
+    SDValue PromOp = PromOps.back();
+    PromOps.pop_back();
+
+    unsigned C;
+    switch (PromOp.getOpcode()) {
+    default:             C = 0; break;
+    case ISD::SELECT:    C = 1; break;
+    case ISD::SELECT_CC: C = 2; break;
+    }
+
+    if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+         PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
+        (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+         PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
+      // The to-be-promoted operands of this node have not yet been
+      // promoted (this should be rare because we're going through the
+      // list backward, but if one of the operands has several users in
+      // this cluster of to-be-promoted nodes, it is possible).
+      PromOps.insert(PromOps.begin(), PromOp);
+      continue;
+    }
+
+    SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+                                PromOp.getNode()->op_end());
+
+    // If this node has constant inputs, then they'll need to be promoted here.
+    for (unsigned i = 0; i < 2; ++i) {
+      if (!isa<ConstantSDNode>(Ops[C+i]))
+        continue;
+      if (Ops[C+i].getValueType() == N->getValueType(0))
+        continue;
+
+      if (N->getOpcode() == ISD::SIGN_EXTEND)
+        Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+      else if (N->getOpcode() == ISD::ZERO_EXTEND)
+        Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+      else
+        Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+    }
+
+    DAG.ReplaceAllUsesOfValueWith(PromOp,
+      DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0),
+                  Ops.data(), Ops.size()));
+  }
+
+  // Now we're left with the initial extension itself.
+  if (!ReallyNeedsExt)
+    return N->getOperand(0);
+
+  // To zero extend, just mask off everything except for the first bit (in the
+  // i1 case).
+  if (N->getOpcode() == ISD::ZERO_EXTEND)
+    return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
+                       DAG.getConstant(APInt::getLowBitsSet(
+                                         N->getValueSizeInBits(0), PromBits),
+                                       N->getValueType(0)));
+
+  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
+         "Invalid extension type");
+  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
+  SDValue ShiftCst =
+    DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy);
+  return DAG.getNode(ISD::SRA, dl, N->getValueType(0), 
+                     DAG.getNode(ISD::SHL, dl, N->getValueType(0),
+                                 N->getOperand(0), ShiftCst), ShiftCst);
+}
+
  SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
    const TargetMachine &TM = getTargetMachine();
@@ -6807,6 +7706,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
          return N->getOperand(0);
      }
      break;
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND: 
+    return DAGCombineExtBoolTrunc(N, DCI);
+  case ISD::TRUNCATE:
+  case ISD::SETCC:
+  case ISD::SELECT_CC:
+    return DAGCombineTruncBoolExt(N, DCI);
    case ISD::FDIV: {
      assert(TM.Options.UnsafeFPMath &&
             "Reciprocal estimates require UnsafeFPMath");
@@ -6867,8 +7774,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      if (RV.getNode() != 0) {
        DCI.AddToWorklist(RV.getNode());
        RV = DAGCombineFastRecip(RV, DCI);
-      if (RV.getNode() != 0)
+      if (RV.getNode() != 0) {
+       // Unfortunately, RV is now NaN if the input was exactly 0. Select out
+       // this case and force the answer to 0.
+
+        EVT VT = RV.getValueType();
+
+        SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
+        if (VT.isVector()) {
+          assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
+          Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
+        }
+
+        SDValue ZeroCmp =
+          DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
+                       N->getOperand(0), Zero, ISD::SETEQ);
+        DCI.AddToWorklist(ZeroCmp.getNode());
+        DCI.AddToWorklist(RV.getNode());
+
+        RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
+                         ZeroCmp, Zero, RV);
          return RV;
+      }
      }
  
      }
@@ -6964,7 +7891,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
      if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
          TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
-        DCI.getDAGCombineLevel() == AfterLegalizeTypes &&
+        (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+         VT == MVT::v4i32 || VT == MVT::v4f32) &&
          LD->getAlignment() < ABIAlignment) {
        // This is a type-legal unaligned Altivec load.
        SDValue Chain = LD->getChain();
@@ -6984,7 +7912,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        // you might suspect (sizeof(vector) bytes after the last requested
        // load), but rather sizeof(vector) - 1 bytes after the last
        // requested vector. The point of this is to avoid a page fault if the
-      // base address happend to be aligned. This works because if the base
+      // base address happened to be aligned. This works because if the base
        // address is aligned, then adding less than a full vector length will
        // cause the last vector in the sequence to be (re)loaded. Otherwise,
        // the next vector will be fetched as you might suspect was necessary.
@@ -7015,12 +7943,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        // require the next load to appear to be aligned, even though it
        // is actually offset from the base pointer by a lesser amount.
        int IncOffset = VT.getSizeInBits() / 8;
-      int IncValue = IncOffset - 1;
+      int IncValue = IncOffset;
+
+      // Walk (both up and down) the chain looking for another load at the real
+      // (aligned) offset (the alignment of the other load does not matter in
+      // this case). If found, then do not use the offset reduction trick, as
+      // that will prevent the loads from being later combined (as they would
+      // otherwise be duplicates).
+      if (!findConsecutiveLoad(LD, DAG))
+        --IncValue;
+
        SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
        Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
  
-      // FIXME: We might have another load (with a slightly-different
-      // real offset) that we can reuse here.
        SDValue ExtraLoad =
          DAG.getLoad(VT, dl, Chain, Ptr,
                      LD->getPointerInfo().getWithOffset(IncOffset),
@@ -7101,6 +8036,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
          }
        }
      }
+
+    break;
    case ISD::BSWAP:
      // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
      if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
@@ -7185,16 +8122,35 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
          }
        }
  
-      // If the user is a MFCR instruction, we know this is safe.  Otherwise we
-      // give up for right now.
-      if (FlagUser->getOpcode() == PPCISD::MFCR)
+      // If the user is a MFOCRF instruction, we know this is safe.
+      // Otherwise we give up for right now.
+      if (FlagUser->getOpcode() == PPCISD::MFOCRF)
          return SDValue(VCMPoNode, 0);
      }
      break;
    }
+  case ISD::BRCOND: {
+    SDValue Cond = N->getOperand(1);
+    SDValue Target = N->getOperand(2);
+ 
+    if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+        cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
+          Intrinsic::ppc_is_decremented_ctr_nonzero) {
+
+      // We now need to make the intrinsic dead (it cannot be instruction
+      // selected).
+      DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
+      assert(Cond.getNode()->hasOneUse() &&
+             "Counter decrement has more than one use");
+
+      return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
+                         N->getOperand(0), Target);
+    }
+  }
+  break;
    case ISD::BR_CC: {
      // If this is a branch on an altivec predicate comparison, lower this so
-    // that we don't have to do a MFCR: instead, branch directly on CR6.  This
+    // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
      // lowering is done pre-legalize, because the legalizer lowers the predicate
      // compare down to code that is difficult to reassemble.
      ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
@@ -7356,6 +8312,8 @@ PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
        // suboptimal.
        return C_Memory;
      }
+  } else if (Constraint == "wc") { // individual CR bits.
+    return C_RegisterClass;
    }
    return TargetLowering::getConstraintType(Constraint);
  }
@@ -7373,7 +8331,11 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
    if (CallOperandVal == NULL)
      return CW_Default;
    Type *type = CallOperandVal->getType();
+
    // Look at the constraint type.
+  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
+    return CW_Register; // an individual CR bit.
+
    switch (*constraint) {
    default:
      weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
@@ -7406,7 +8368,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
  
  std::pair<unsigned, const TargetRegisterClass*>
  PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                EVT VT) const {
+                                                MVT VT) const {
    if (Constraint.size() == 1) {
      // GCC RS6000 Constraint Letters
      switch (Constraint[0]) {
@@ -7429,9 +8391,28 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
      case 'y':   // crrc
        return std::make_pair(0U, &PPC::CRRCRegClass);
      }
+  } else if (Constraint == "wc") { // an individual CR bit.
+    return std::make_pair(0U, &PPC::CRBITRCRegClass);
+  }
+
+  std::pair<unsigned, const TargetRegisterClass*> R =
+    TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
+  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
+  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
+  // register.
+  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
+  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
+  if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&
+      PPC::GPRCRegClass.contains(R.first)) {
+    const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+    return std::make_pair(TRI->getMatchingSuperReg(R.first,
+                            PPC::sub_32, &PPC::G8RCRegClass),
+                          &PPC::G8RCRegClass);
    }
  
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  return R;
  }
  
  
@@ -7547,6 +8528,9 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
    MachineFrameInfo *MFI = MF.getFrameInfo();
    MFI->setReturnAddressIsTaken(true);
  
+  if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+    return SDValue();
+
    SDLoc dl(Op);
    unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
  
@@ -7635,6 +8619,7 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
  }
  
  bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+                                                      unsigned,
                                                        bool *Fast) const {
    if (DisablePPCUnaligned)
      return false;
@@ -7660,18 +8645,15 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
    return true;
  }
  
-/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-/// is expanded to mul + add.
-bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  VT = VT.getScalarType();
+
    if (!VT.isSimple())
      return false;
  
    switch (VT.getSimpleVT().SimpleTy) {
    case MVT::f32:
    case MVT::f64:
-  case MVT::v4f32:
      return true;
    default:
      break;
@@ -7681,9 +8663,15 @@ bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
  }
  
  Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
-  if (DisableILPPref)
+  if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
      return TargetLowering::getSchedulingPreference(N);
  
    return Sched::ILP;
  }
  
+// Create a fast isel object.
+FastISel *
+PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
+                                  const TargetLibraryInfo *LibInfo) const {
+  return PPC::createFastISel(FuncInfo, LibInfo);
+}