Revert r185595-185596 which broke buildbots.

[oota-llvm.git] / lib / Target / PowerPC / PPCISelLowering.cpp
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index b0a684eee1a28cc94436295e4b424bcd74ac1fd9..db49e213e91e6983a8eaf3761c394188ab138670 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -37,21 +37,6 @@
  #include "llvm/Target/TargetOptions.h"
  using namespace llvm;
  
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                       CCValAssign::LocInfo &LocInfo,
-                                       ISD::ArgFlagsTy &ArgFlags,
-                                       CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                              MVT &LocVT,
-                                              CCValAssign::LocInfo &LocInfo,
-                                              ISD::ArgFlagsTy &ArgFlags,
-                                              CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                                MVT &LocVT,
-                                                CCValAssign::LocInfo &LocInfo,
-                                                ISD::ArgFlagsTy &ArgFlags,
-                                                CCState &State);
-
  static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
  cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
  
@@ -74,8 +59,6 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
  PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
    const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
-  PPCRegInfo = TM.getRegisterInfo();
-  PPCII = TM.getInstrInfo();
  
    setPow2DivIsCheap();
  
@@ -540,6 +523,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
    setTargetDAGCombine(ISD::STORE);
    setTargetDAGCombine(ISD::BR_CC);
    setTargetDAGCombine(ISD::BSWAP);
+  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
  
    // Use reciprocal estimates.
    if (TM.Options.UnsafeFPMath) {
@@ -642,7 +626,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
    case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
    case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
    case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
-  case PPCISD::MFCR:            return "PPCISD::MFCR";
+  case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
    case PPCISD::VCMP:            return "PPCISD::VCMP";
    case PPCISD::VCMPo:           return "PPCISD::VCMPo";
    case PPCISD::LBRX:            return "PPCISD::LBRX";
@@ -1056,7 +1040,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
                                              SelectionDAG &DAG,
                                              bool Aligned) const {
    // FIXME dl should come from parent load or store, not from address
-  DebugLoc dl = N.getDebugLoc();
+  SDLoc dl(N);
    // If this can be more profitably realized as r+r, fail.
    if (SelectAddressRegReg(N, Disp, Base, DAG))
      return false;
@@ -1166,7 +1150,6 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
    return true;
  }
  
-
  /// getPreIndexedAddressParts - returns true by value, base pointer and
  /// offset pointer and addressing mode by reference if the node's address
  /// can be legally represented as pre-indexed load / store address.
@@ -1253,8 +1236,8 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
  /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
  static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
                                 unsigned &LoOpFlags, const GlobalValue *GV = 0) {
-  HiOpFlags = PPCII::MO_HA16;
-  LoOpFlags = PPCII::MO_LO16;
+  HiOpFlags = PPCII::MO_HA;
+  LoOpFlags = PPCII::MO_LO;
  
    // Don't use the pic base if not in PIC relocation model.  Or if we are on a
    // non-darwin platform.  We don't support PIC on other platforms yet.
@@ -1284,7 +1267,7 @@ static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
                               SelectionDAG &DAG) {
    EVT PtrVT = HiPart.getValueType();
    SDValue Zero = DAG.getConstant(0, PtrVT);
-  DebugLoc DL = HiPart.getDebugLoc();
+  SDLoc DL(HiPart);
  
    SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
    SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
@@ -1309,7 +1292,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
    // The actual address of the GlobalValue is stored in the TOC.
    if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
      SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
-    return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA,
+    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
                         DAG.getRegister(PPC::X2, MVT::i64));
    }
  
@@ -1330,7 +1313,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
    // The actual address of the GlobalValue is stored in the TOC.
    if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
      SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
-    return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA,
+    return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
                         DAG.getRegister(PPC::X2, MVT::i64));
    }
  
@@ -1358,7 +1341,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
  
    GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
-  DebugLoc dl = GA->getDebugLoc();
+  SDLoc dl(GA);
    const GlobalValue *GV = GA->getGlobal();
    EVT PtrVT = getPointerTy();
    bool is64bit = PPCSubTarget.isPPC64();
@@ -1367,9 +1350,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
  
    if (Model == TLSModel::LocalExec) {
      SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                               PPCII::MO_TPREL16_HA);
+                                               PPCII::MO_TPREL_HA);
      SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                               PPCII::MO_TPREL16_LO);
+                                               PPCII::MO_TPREL_LO);
      SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
                                       is64bit ? MVT::i64 : MVT::i32);
      SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
@@ -1444,7 +1427,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
    EVT PtrVT = Op.getValueType();
    GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
-  DebugLoc DL = GSDN->getDebugLoc();
+  SDLoc DL(GSDN);
    const GlobalValue *GV = GSDN->getGlobal();
  
    // 64-bit SVR4 ABI code is always position-independent.
@@ -1475,7 +1458,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
  
  SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
  
    // If we're comparing for equality to zero, expose the fact that this is
    // implented as a ctlz/srl pair on ppc, so that the dag combiner can
@@ -1524,7 +1507,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
    SDValue InChain = Node->getOperand(0);
    SDValue VAListPtr = Node->getOperand(1);
    const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
-  DebugLoc dl = Node->getDebugLoc();
+  SDLoc dl(Node);
  
    assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
  
@@ -1635,7 +1618,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
    SDValue Trmp = Op.getOperand(1); // trampoline
    SDValue FPtr = Op.getOperand(2); // nested function
    SDValue Nest = Op.getOperand(3); // 'nest' parameter value
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
  
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    bool isPPC64 = (PtrVT == MVT::i64);
@@ -1677,7 +1660,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
    MachineFunction &MF = DAG.getMachineFunction();
    PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
  
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
  
    if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
      // vastart just stores the address of the VarArgsFrameIndex slot into the
@@ -1771,18 +1754,18 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
  
  #include "PPCGenCallingConv.inc"
  
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
-                                       CCValAssign::LocInfo &LocInfo,
-                                       ISD::ArgFlagsTy &ArgFlags,
-                                       CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                      CCValAssign::LocInfo &LocInfo,
+                                      ISD::ArgFlagsTy &ArgFlags,
+                                      CCState &State) {
    return true;
  }
  
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
-                                              MVT &LocVT,
-                                              CCValAssign::LocInfo &LocInfo,
-                                              ISD::ArgFlagsTy &ArgFlags,
-                                              CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                             MVT &LocVT,
+                                             CCValAssign::LocInfo &LocInfo,
+                                             ISD::ArgFlagsTy &ArgFlags,
+                                             CCState &State) {
    static const uint16_t ArgRegs[] = {
      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
@@ -1805,11 +1788,11 @@ static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
    return false;
  }
  
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
-                                                MVT &LocVT,
-                                                CCValAssign::LocInfo &LocInfo,
-                                                ISD::ArgFlagsTy &ArgFlags,
-                                                CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                               MVT &LocVT,
+                                               CCValAssign::LocInfo &LocInfo,
+                                               ISD::ArgFlagsTy &ArgFlags,
+                                               CCState &State) {
    static const uint16_t ArgRegs[] = {
      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
      PPC::F8
@@ -1860,7 +1843,7 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
                                          CallingConv::ID CallConv, bool isVarArg,
                                          const SmallVectorImpl<ISD::InputArg>
                                            &Ins,
-                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SDLoc dl, SelectionDAG &DAG,
                                          SmallVectorImpl<SDValue> &InVals)
                                            const {
    if (PPCSubTarget.isSVR4ABI()) {
@@ -1882,7 +1865,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
                                        CallingConv::ID CallConv, bool isVarArg,
                                        const SmallVectorImpl<ISD::InputArg>
                                          &Ins,
-                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SDLoc dl, SelectionDAG &DAG,
                                        SmallVectorImpl<SDValue> &InVals) const {
  
    // 32-bit SVR4 ABI Stack Frame Layout:
@@ -2099,7 +2082,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
  SDValue
  PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
                                       SelectionDAG &DAG, SDValue ArgVal,
-                                     DebugLoc dl) const {
+                                     SDLoc dl) const {
    if (Flags.isSExt())
      ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
                           DAG.getValueType(ObjectVT));
@@ -2142,7 +2125,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
                                        CallingConv::ID CallConv, bool isVarArg,
                                        const SmallVectorImpl<ISD::InputArg>
                                          &Ins,
-                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SDLoc dl, SelectionDAG &DAG,
                                        SmallVectorImpl<SDValue> &InVals) const {
    // TODO: add description of PPC stack frame format, or at least some docs.
    //
@@ -2431,7 +2414,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
                                        CallingConv::ID CallConv, bool isVarArg,
                                        const SmallVectorImpl<ISD::InputArg>
                                          &Ins,
-                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SDLoc dl, SelectionDAG &DAG,
                                        SmallVectorImpl<SDValue> &InVals) const {
    // TODO: add description of PPC stack frame format, or at least some docs.
    //
@@ -2933,7 +2916,7 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
                                             SDValue Chain,
                     const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
                     SmallVector<SDValue, 8> &MemOpChains,
-                   DebugLoc dl) {
+                   SDLoc dl) {
    for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
      SDValue Arg = TailCallArgs[i].Arg;
      SDValue FIN = TailCallArgs[i].FrameIdxOp;
@@ -2955,7 +2938,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
                                                 int SPDiff,
                                                 bool isPPC64,
                                                 bool isDarwinABI,
-                                               DebugLoc dl) {
+                                               SDLoc dl) {
    if (SPDiff) {
      // Calculate the new stack slot for the return address.
      int SlotSize = isPPC64 ? 8 : 4;
@@ -3012,7 +2995,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
                                                          SDValue &LROpOut,
                                                          SDValue &FPOpOut,
                                                          bool isDarwinABI,
-                                                        DebugLoc dl) const {
+                                                        SDLoc dl) const {
    if (SPDiff) {
      // Load the LR and FP stack slot for later adjusting.
      EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
@@ -3042,7 +3025,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
  static SDValue
  CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                            ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
-                          DebugLoc dl) {
+                          SDLoc dl) {
    SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
    return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                         false, false, MachinePointerInfo(0),
@@ -3057,7 +3040,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
                   unsigned ArgOffset, bool isPPC64, bool isTailCall,
                   bool isVector, SmallVector<SDValue, 8> &MemOpChains,
                   SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
-                 DebugLoc dl) {
+                 SDLoc dl) {
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
    if (!isTailCall) {
      if (isVector) {
@@ -3078,7 +3061,7 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
  
  static
  void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
-                     DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+                     SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
                       SDValue LROp, SDValue FPOp, bool isDarwinABI,
                       SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
    MachineFunction &MF = DAG.getMachineFunction();
@@ -3100,13 +3083,13 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
  
    // Emit callseq_end just before tailcall node.
    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(0, true), InFlag);
+                             DAG.getIntPtrConstant(0, true), InFlag, dl);
    InFlag = Chain.getValue(1);
  }
  
  static
  unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
-                     SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
+                     SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
                       SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
                       SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
                       const PPCSubtarget &PPCSubTarget) {
@@ -3292,7 +3275,7 @@ SDValue
  PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                     CallingConv::ID CallConv, bool isVarArg,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
-                                   DebugLoc dl, SelectionDAG &DAG,
+                                   SDLoc dl, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &InVals) const {
  
    SmallVector<CCValAssign, 16> RVLocs;
@@ -3335,7 +3318,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
  }
  
  SDValue
-PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
+PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
                                bool isTailCall, bool isVarArg,
                                SelectionDAG &DAG,
                                SmallVector<std::pair<unsigned, SDValue>, 8>
@@ -3422,7 +3405,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
  
    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
                               DAG.getIntPtrConstant(BytesCalleePops, true),
-                             InFlag);
+                             InFlag, dl);
    if (!Ins.empty())
      InFlag = Chain.getValue(1);
  
@@ -3434,7 +3417,7 @@ SDValue
  PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                               SmallVectorImpl<SDValue> &InVals) const {
    SelectionDAG &DAG                     = CLI.DAG;
-  DebugLoc &dl                          = CLI.DL;
+  SDLoc &dl                          = CLI.DL;
    SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
    SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
    SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
@@ -3471,7 +3454,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                      const SmallVectorImpl<SDValue> &OutVals,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SDLoc dl, SelectionDAG &DAG,
                                      SmallVectorImpl<SDValue> &InVals) const {
    // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
    // of the 32-bit SVR4 ABI stack frame layout.
@@ -3557,7 +3540,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
  
    // Adjust the stack pointer for the new arguments...
    // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               dl);
    SDValue CallSeqStart = Chain;
  
    // Load the return address and frame pointer so it can be moved somewhere else
@@ -3608,7 +3592,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
  
        // This must go outside the CALLSEQ_START..END.
        SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
-                           CallSeqStart.getNode()->getOperand(1));
+                           CallSeqStart.getNode()->getOperand(1),
+                           SDLoc(MemcpyCall));
        DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
                               NewCallSeqStart.getNode());
        Chain = CallSeqStart = NewCallSeqStart;
@@ -3684,13 +3669,14 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
                                                SDValue CallSeqStart,
                                                ISD::ArgFlagsTy Flags,
                                                SelectionDAG &DAG,
-                                              DebugLoc dl) const {
+                                              SDLoc dl) const {
    SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
                          CallSeqStart.getNode()->getOperand(0),
                          Flags, DAG, dl);
    // The MEMCPY must go outside the CALLSEQ_START..END.
    SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
-                             CallSeqStart.getNode()->getOperand(1));
+                             CallSeqStart.getNode()->getOperand(1),
+                             SDLoc(MemcpyCall));
    DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
                           NewCallSeqStart.getNode());
    return NewCallSeqStart;
@@ -3703,7 +3689,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                      const SmallVectorImpl<SDValue> &OutVals,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SDLoc dl, SelectionDAG &DAG,
                                      SmallVectorImpl<SDValue> &InVals) const {
  
    unsigned NumOps = Outs.size();
@@ -3744,7 +3730,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
  
    // Adjust the stack pointer for the new arguments...
    // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               dl);
    SDValue CallSeqStart = Chain;
  
    // Load the return address and frame pointer so it can be move somewhere else
@@ -4074,7 +4061,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                      const SmallVectorImpl<SDValue> &OutVals,
                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SDLoc dl, SelectionDAG &DAG,
                                      SmallVectorImpl<SDValue> &InVals) const {
  
    unsigned NumOps = Outs.size();
@@ -4115,7 +4102,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
  
    // Adjust the stack pointer for the new arguments...
    // These operations are automatically eliminated by the prolog/epilog pass
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                               dl);
    SDValue CallSeqStart = Chain;
  
    // Load the return address and frame pointer so it can be move somewhere else
@@ -4431,7 +4419,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
                                 CallingConv::ID CallConv, bool isVarArg,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                                 const SmallVectorImpl<SDValue> &OutVals,
-                               DebugLoc dl, SelectionDAG &DAG) const {
+                               SDLoc dl, SelectionDAG &DAG) const {
  
    SmallVector<CCValAssign, 16> RVLocs;
    CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
@@ -4480,7 +4468,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
  SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
                                     const PPCSubtarget &Subtarget) const {
    // When we pop the dynamic allocation we need to restore the SP link.
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
  
    // Get the corect type for pointers.
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -4565,7 +4553,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
    // Get the inputs.
    SDValue Chain = Op.getOperand(0);
    SDValue Size  = Op.getOperand(1);
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
  
    // Get the corect type for pointers.
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -4582,7 +4570,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
  
  SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
                                                 SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
                       DAG.getVTList(MVT::i32, MVT::Other),
                       Op.getOperand(0), Op.getOperand(1));
@@ -4590,7 +4578,7 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
  
  SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
                                                  SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
                       Op.getOperand(0), Op.getOperand(1));
  }
@@ -4616,7 +4604,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
    EVT CmpVT = Op.getOperand(0).getValueType();
    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
    SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
  
    // If the RHS of the comparison is a 0.0, we don't need to do the
    // subtraction at all.
@@ -4697,7 +4685,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
  
  // FIXME: Split this code up when LegalizeDAGTypes lands.
  SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
-                                           DebugLoc dl) const {
+                                           SDLoc dl) const {
    assert(Op.getOperand(0).getValueType().isFloatingPoint());
    SDValue Src = Op.getOperand(0);
    if (Src.getValueType() == MVT::f32)
@@ -4756,7 +4744,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
  
  SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                                             SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    // Don't handle ppc_fp128 here; let it be lowered to a libcall.
    if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
      return SDValue();
@@ -4890,7 +4878,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
  
  SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
                                              SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    /*
     The rounding mode is in bits 30:31 of FPSR, and has the following
     settings:
@@ -4956,7 +4944,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
  SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
    EVT VT = Op.getValueType();
    unsigned BitWidth = VT.getSizeInBits();
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    assert(Op.getNumOperands() == 3 &&
           VT == Op.getOperand(1).getValueType() &&
           "Unexpected SHL!");
@@ -4984,7 +4972,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
  
  SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
    EVT VT = Op.getValueType();
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    unsigned BitWidth = VT.getSizeInBits();
    assert(Op.getNumOperands() == 3 &&
           VT == Op.getOperand(1).getValueType() &&
@@ -5012,7 +5000,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
  }
  
  SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    EVT VT = Op.getValueType();
    unsigned BitWidth = VT.getSizeInBits();
    assert(Op.getNumOperands() == 3 &&
@@ -5047,7 +5035,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
  /// BuildSplatI - Build a canonical splati of Val with an element size of
  /// SplatSize.  Cast the result to VT.
  static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
-                             SelectionDAG &DAG, DebugLoc dl) {
+                             SelectionDAG &DAG, SDLoc dl) {
    assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
  
    static const EVT VTys[] = { // canonical VT to use for each size.
@@ -5074,7 +5062,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
  /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
  /// specified intrinsic ID.
  static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
-                                SelectionDAG &DAG, DebugLoc dl,
+                                SelectionDAG &DAG, SDLoc dl,
                                  EVT DestVT = MVT::Other) {
    if (DestVT == MVT::Other) DestVT = Op.getValueType();
    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
@@ -5084,7 +5072,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
  /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
  /// specified intrinsic ID.
  static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
-                                SelectionDAG &DAG, DebugLoc dl,
+                                SelectionDAG &DAG, SDLoc dl,
                                  EVT DestVT = MVT::Other) {
    if (DestVT == MVT::Other) DestVT = LHS.getValueType();
    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
@@ -5095,7 +5083,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
  /// specified intrinsic ID.
  static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
                                  SDValue Op2, SelectionDAG &DAG,
-                                DebugLoc dl, EVT DestVT = MVT::Other) {
+                                SDLoc dl, EVT DestVT = MVT::Other) {
    if (DestVT == MVT::Other) DestVT = Op0.getValueType();
    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
                       DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
@@ -5105,7 +5093,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
  /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
  /// amount.  The result has the specified value type.
  static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
-                             EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+                             EVT VT, SelectionDAG &DAG, SDLoc dl) {
    // Force LHS/RHS to be the right type.
    LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
    RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
@@ -5124,7 +5112,7 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
  // sequence of ops that should be used.
  SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
                                               SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
    assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
  
@@ -5280,7 +5268,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
  /// the specified operations to build the shuffle.
  static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
                                        SDValue RHS, SelectionDAG &DAG,
-                                      DebugLoc dl) {
+                                      SDLoc dl) {
    unsigned OpNum = (PFEntry >> 26) & 0x0F;
    unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
    unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
@@ -5359,7 +5347,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
  /// lowered into a vperm.
  SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
                                                 SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    SDValue V1 = Op.getOperand(0);
    SDValue V2 = Op.getOperand(1);
    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -5526,7 +5514,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                     SelectionDAG &DAG) const {
    // If this is a lowered altivec predicate compare, CompareOpc is set to the
    // opcode number of the comparison.
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    int CompareOpc;
    bool isDot;
    if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
@@ -5551,7 +5539,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
  
    // Now that we have the comparison, emit a copy from the CR to a GPR.
    // This is flagged to the above dot comparison.
-  SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
                                  DAG.getRegister(PPC::CR6, MVT::i32),
                                  CompNode.getValue(1));
  
@@ -5590,7 +5578,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
  
  SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                                                     SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    // Create a stack slot that is 16-byte aligned.
    MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
    int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
@@ -5607,7 +5595,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
  }
  
  SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    if (Op.getValueType() == MVT::v4i32) {
      SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
  
@@ -5694,7 +5682,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
    case ISD::FP_TO_UINT:
    case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
-                                                       Op.getDebugLoc());
+                                                       SDLoc(Op));
    case ISD::UINT_TO_FP:
    case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
    case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
@@ -5724,7 +5712,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                                             SmallVectorImpl<SDValue>&Results,
                                             SelectionDAG &DAG) const {
    const TargetMachine &TM = getTargetMachine();
-  DebugLoc dl = N->getDebugLoc();
+  SDLoc dl(N);
    switch (N->getOpcode()) {
    default:
      llvm_unreachable("Do not know how to custom type legalize this operation!");
@@ -6067,7 +6055,9 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
  
    // Setup
    MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
-  MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+  const PPCRegisterInfo *TRI =
+    static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
+  MIB.addRegMask(TRI->getNoPreservedMask());
  
    BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
  
@@ -6230,8 +6220,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      Cond.push_back(MI->getOperand(1));
  
      DebugLoc dl = MI->getDebugLoc();
-    PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond,
-                        MI->getOperand(2).getReg(), MI->getOperand(3).getReg());
+    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+    TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
+                      Cond, MI->getOperand(2).getReg(),
+                      MI->getOperand(3).getReg());
    } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
               MI->getOpcode() == PPC::SELECT_CC_I8 ||
               MI->getOpcode() == PPC::SELECT_CC_F4 ||
@@ -6675,7 +6667,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
        ++Iterations;
  
      SelectionDAG &DAG = DCI.DAG;
-    DebugLoc dl = Op.getDebugLoc();
+    SDLoc dl(Op);
  
      SDValue FPOne =
        DAG.getConstantFP(1.0, VT.getScalarType());
@@ -6737,7 +6729,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
        ++Iterations;
  
      SelectionDAG &DAG = DCI.DAG;
-    DebugLoc dl = Op.getDebugLoc();
+    SDLoc dl(Op);
  
      SDValue FPThreeHalves =
        DAG.getConstantFP(1.5, VT.getScalarType());
@@ -6781,11 +6773,120 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
    return SDValue();
  }
  
+// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
+// not enforce equality of the chain operands.
+static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
+                            unsigned Bytes, int Dist,
+                            SelectionDAG &DAG) {
+  EVT VT = LS->getMemoryVT();
+  if (VT.getSizeInBits() / 8 != Bytes)
+    return false;
+
+  SDValue Loc = LS->getBasePtr();
+  SDValue BaseLoc = Base->getBasePtr();
+  if (Loc.getOpcode() == ISD::FrameIndex) {
+    if (BaseLoc.getOpcode() != ISD::FrameIndex)
+      return false;
+    const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
+    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+    int FS  = MFI->getObjectSize(FI);
+    int BFS = MFI->getObjectSize(BFI);
+    if (FS != BFS || FS != (int)Bytes) return false;
+    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+  }
+
+  // Handle X+C
+  if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+    return true;
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  const GlobalValue *GV1 = NULL;
+  const GlobalValue *GV2 = NULL;
+  int64_t Offset1 = 0;
+  int64_t Offset2 = 0;
+  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+  if (isGA1 && isGA2 && GV1 == GV2)
+    return Offset1 == (Offset2 + Dist*Bytes);
+  return false;
+}
+
+// Return true is there is a nearyby consecutive load to the one provided
+// (regardless of alignment). We search up and down the chain, looking though
+// token factors and other loads (but nothing else). As a result, a true
+// results indicates that it is safe to create a new consecutive load adjacent
+// to the load provided.
+static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
+  SDValue Chain = LD->getChain();
+  EVT VT = LD->getMemoryVT();
+
+  SmallSet<SDNode *, 16> LoadRoots;
+  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
+  SmallSet<SDNode *, 16> Visited;
+
+  // First, search up the chain, branching to follow all token-factor operands.
+  // If we find a consecutive load, then we're done, otherwise, record all
+  // nodes just above the top-level loads and token factors.
+  while (!Queue.empty()) {
+    SDNode *ChainNext = Queue.pop_back_val();
+    if (!Visited.insert(ChainNext))
+      continue;
+
+    if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+      if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+        return true;
+
+      if (!Visited.count(ChainLD->getChain().getNode()))
+        Queue.push_back(ChainLD->getChain().getNode());
+    } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
+      for (SDNode::op_iterator O = ChainNext->op_begin(),
+           OE = ChainNext->op_end(); O != OE; ++O)
+        if (!Visited.count(O->getNode()))
+          Queue.push_back(O->getNode());
+    } else
+      LoadRoots.insert(ChainNext);
+  }
+
+  // Second, search down the chain, starting from the top-level nodes recorded
+  // in the first phase. These top-level nodes are the nodes just above all
+  // loads and token factors. Starting with their uses, recursively look though
+  // all loads (just the chain uses) and token factors to find a consecutive
+  // load.
+  Visited.clear();
+  Queue.clear();
+
+  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
+       IE = LoadRoots.end(); I != IE; ++I) {
+    Queue.push_back(*I);
+       
+    while (!Queue.empty()) {
+      SDNode *LoadRoot = Queue.pop_back_val();
+      if (!Visited.insert(LoadRoot))
+        continue;
+
+      if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+        if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+          return true;
+
+      for (SDNode::use_iterator UI = LoadRoot->use_begin(),
+           UE = LoadRoot->use_end(); UI != UE; ++UI)
+        if (((isa<LoadSDNode>(*UI) &&
+            cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+            UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
+          Queue.push_back(*UI);
+    }
+  }
+
+  return false;
+}
+
  SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
    const TargetMachine &TM = getTargetMachine();
    SelectionDAG &DAG = DCI.DAG;
-  DebugLoc dl = N->getDebugLoc();
+  SDLoc dl(N);
    switch (N->getOpcode()) {
    default: break;
    case PPCISD::SHL:
@@ -6826,7 +6927,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                   DCI);
        if (RV.getNode() != 0) {
          DCI.AddToWorklist(RV.getNode());
-        RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(),
+        RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
                           N->getValueType(0), RV);
          DCI.AddToWorklist(RV.getNode());
          return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
@@ -6839,7 +6940,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
                                   DCI);
        if (RV.getNode() != 0) {
          DCI.AddToWorklist(RV.getNode());
-        RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(),
+        RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
                           N->getValueType(0), RV,
                           N->getOperand(1).getOperand(1));
          DCI.AddToWorklist(RV.getNode());
@@ -6989,8 +7090,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        // cause the last vector in the sequence to be (re)loaded. Otherwise,
        // the next vector will be fetched as you might suspect was necessary.
  
-      // FIXME: We might be able to reuse the permutation generation from
+      // We might be able to reuse the permutation generation from
        // a different base address offset from this one by an aligned amount.
+      // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
+      // optimization later.
        SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr,
                                            DAG, dl, MVT::v16i8);
  
@@ -7013,12 +7116,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
        // require the next load to appear to be aligned, even though it
        // is actually offset from the base pointer by a lesser amount.
        int IncOffset = VT.getSizeInBits() / 8;
-      int IncValue = IncOffset - 1;
+      int IncValue = IncOffset;
+
+      // Walk (both up and down) the chain looking for another load at the real
+      // (aligned) offset (the alignment of the other load does not matter in
+      // this case). If found, then do not use the offset reduction trick, as
+      // that will prevent the loads from being later combined (as they would
+      // otherwise be duplicates).
+      if (!findConsecutiveLoad(LD, DAG))
+        --IncValue;
+
        SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
        Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
  
-      // FIXME: We might have another load (with a slightly-different
-      // real offset) that we can reuse here.
        SDValue ExtraLoad =
          DAG.getLoad(VT, dl, Chain, Ptr,
                      LD->getPointerInfo().getWithOffset(IncOffset),
@@ -7075,6 +7185,30 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
      }
      }
      break;
+  case ISD::INTRINSIC_WO_CHAIN:
+    if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() ==
+          Intrinsic::ppc_altivec_lvsl &&
+        N->getOperand(1)->getOpcode() == ISD::ADD) {
+      SDValue Add = N->getOperand(1);
+
+      if (DAG.MaskedValueIsZero(Add->getOperand(1),
+            APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
+              Add.getValueType().getScalarType().getSizeInBits()))) {
+        SDNode *BasePtr = Add->getOperand(0).getNode();
+        for (SDNode::use_iterator UI = BasePtr->use_begin(),
+             UE = BasePtr->use_end(); UI != UE; ++UI) {
+          if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+              cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
+                Intrinsic::ppc_altivec_lvsl) {
+            // We've found another LVSL, and this address if an aligned
+            // multiple of that one. The results will be the same, so use the
+            // one we've just found instead.
+
+            return SDValue(*UI, 0);
+          }
+        }
+      }
+    }
    case ISD::BSWAP:
      // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
      if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
@@ -7159,16 +7293,16 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
          }
        }
  
-      // If the user is a MFCR instruction, we know this is safe.  Otherwise we
-      // give up for right now.
-      if (FlagUser->getOpcode() == PPCISD::MFCR)
+      // If the user is a MFOCRF instruction, we know this is safe.
+      // Otherwise we give up for right now.
+      if (FlagUser->getOpcode() == PPCISD::MFOCRF)
          return SDValue(VCMPoNode, 0);
      }
      break;
    }
    case ISD::BR_CC: {
      // If this is a branch on an altivec predicate comparison, lower this so
-    // that we don't have to do a MFCR: instead, branch directly on CR6.  This
+    // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
      // lowering is done pre-legalize, because the legalizer lowers the predicate
      // compare down to code that is difficult to reassemble.
      ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
@@ -7380,7 +7514,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
  
  std::pair<unsigned, const TargetRegisterClass*>
  PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
-                                                EVT VT) const {
+                                                MVT VT) const {
    if (Constraint.size() == 1) {
      // GCC RS6000 Constraint Letters
      switch (Constraint[0]) {
@@ -7521,7 +7655,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
    MachineFrameInfo *MFI = MF.getFrameInfo();
    MFI->setReturnAddressIsTaken(true);
  
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
  
    // Make sure the function does not optimize away the store of the RA to
@@ -7551,7 +7685,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
  
  SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
                                            SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
  
    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();