Make the indirect branch optimization deterministic. No functionality change.

[oota-llvm.git] / lib / CodeGen / SelectionDAG / SelectionDAGBuilder.cpp
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index ec5e08c37ffdd564a4199013b4d5d098609cbf19..85103d5750a096ff3b02f81844f891debccbaaa0 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -19,8 +19,10 @@
  #include "llvm/ADT/SmallSet.h"
  #include "llvm/Analysis/AliasAnalysis.h"
  #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
  #include "llvm/Constants.h"
  #include "llvm/CallingConv.h"
+#include "llvm/DebugInfo.h"
  #include "llvm/DerivedTypes.h"
  #include "llvm/Function.h"
  #include "llvm/GlobalVariable.h"
@@ -42,8 +44,7 @@
  #include "llvm/CodeGen/MachineModuleInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Target/TargetData.h"
+#include "llvm/DataLayout.h"
  #include "llvm/Target/TargetFrameLowering.h"
  #include "llvm/Target/TargetInstrInfo.h"
  #include "llvm/Target/TargetIntrinsicInfo.h"
@@ -51,6 +52,7 @@
  #include "llvm/Target/TargetLowering.h"
  #include "llvm/Target/TargetOptions.h"
  #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/IntegersSubsetMapping.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
  #include "llvm/Support/MathExtras.h"
@@ -87,7 +89,7 @@ static const unsigned MaxParallelChains = 64;
  
  static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                        const SDValue *Parts, unsigned NumParts,
-                                      EVT PartVT, EVT ValueVT);
+                                      EVT PartVT, EVT ValueVT, const Value *V);
  
  /// getCopyFromParts - Create a value that contains the specified legal parts
  /// combined into the value they represent.  If the parts combine to a type
@@ -97,9 +99,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
  static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
                                  const SDValue *Parts,
                                  unsigned NumParts, EVT PartVT, EVT ValueVT,
+                                const Value *V,
                                  ISD::NodeType AssertOp = ISD::DELETED_NODE) {
    if (ValueVT.isVector())
-    return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
+    return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
+                                  PartVT, ValueVT, V);
  
    assert(NumParts > 0 && "No parts to assemble!");
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -123,9 +127,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
  
        if (RoundParts > 2) {
          Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
-                              PartVT, HalfVT);
+                              PartVT, HalfVT, V);
          Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
-                              RoundParts / 2, PartVT, HalfVT);
+                              RoundParts / 2, PartVT, HalfVT, V);
        } else {
          Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
          Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
@@ -141,7 +145,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
          unsigned OddParts = NumParts - RoundParts;
          EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
          Hi = getCopyFromParts(DAG, DL,
-                              Parts + RoundParts, OddParts, PartVT, OddVT);
+                              Parts + RoundParts, OddParts, PartVT, OddVT, V);
  
          // Combine the round and odd parts.
          Lo = Val;
@@ -170,7 +174,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
        assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
               !PartVT.isVector() && "Unexpected split");
        EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
-      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
+      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
      }
    }
  
@@ -208,14 +212,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
    llvm_unreachable("Unknown mismatch!");
  }
  
-/// getCopyFromParts - Create a value that contains the specified legal parts
-/// combined into the value they represent.  If the parts combine to a type
-/// larger then ValueVT then AssertOp can be used to specify whether the extra
-/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
-/// (ISD::AssertSext).
+/// getCopyFromPartsVector - Create a value that contains the specified legal
+/// parts combined into the value they represent.  If the parts combine to a
+/// type larger then ValueVT then AssertOp can be used to specify whether the
+/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
+/// ValueVT (ISD::AssertSext).
  static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                        const SDValue *Parts, unsigned NumParts,
-                                      EVT PartVT, EVT ValueVT) {
+                                      EVT PartVT, EVT ValueVT, const Value *V) {
    assert(ValueVT.isVector() && "Not a vector value");
    assert(NumParts > 0 && "No parts to assemble!");
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -241,7 +245,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
        // as appropriate.
        for (unsigned i = 0; i != NumParts; ++i)
          Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
-                                  PartVT, IntermediateVT);
+                                  PartVT, IntermediateVT, V);
      } else if (NumParts > 0) {
        // If the intermediate type was expanded, build the intermediate
        // operands from the parts.
@@ -250,7 +254,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
        unsigned Factor = NumParts / NumIntermediates;
        for (unsigned i = 0; i != NumIntermediates; ++i)
          Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
-                                  PartVT, IntermediateVT);
+                                  PartVT, IntermediateVT, V);
      }
  
      // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
@@ -298,8 +302,19 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
      return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
  
    // Handle cases such as i8 -> <1 x i1>
-  assert(ValueVT.getVectorNumElements() == 1 &&
-         "Only trivial scalar-to-vector conversions should get here!");
+  if (ValueVT.getVectorNumElements() != 1) {
+    LLVMContext &Ctx = *DAG.getContext();
+    Twine ErrMsg("non-trivial scalar-to-vector conversion");
+    if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+      if (const CallInst *CI = dyn_cast<CallInst>(I))
+        if (isa<InlineAsm>(CI->getCalledValue()))
+          ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+      Ctx.emitError(I, ErrMsg);
+    } else {
+      Ctx.emitError(ErrMsg);
+    }
+    report_fatal_error("Cannot handle scalar-to-vector conversion!");
+  }
  
    if (ValueVT.getVectorNumElements() == 1 &&
        ValueVT.getVectorElementType() != PartVT) {
@@ -311,25 +326,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
    return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
  }
  
-
-
-
  static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
                                   SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 EVT PartVT);
+                                 EVT PartVT, const Value *V);
  
  /// getCopyToParts - Create a series of nodes that contain the specified value
  /// split into legal parts.  If the parts contain more bits than Val, then, for
  /// integers, ExtendKind can be used to specify how to generate the extra bits.
  static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
                             SDValue Val, SDValue *Parts, unsigned NumParts,
-                           EVT PartVT,
+                           EVT PartVT, const Value *V,
                             ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
    EVT ValueVT = Val.getValueType();
  
    // Handle the vector case separately.
    if (ValueVT.isVector())
-    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
+    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
  
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    unsigned PartBits = PartVT.getSizeInBits();
@@ -381,7 +393,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
           "Failed to tile the value with PartVT!");
  
    if (NumParts == 1) {
-    assert(PartVT == ValueVT && "Type conversion failed!");
+    if (PartVT != ValueVT) {
+      LLVMContext &Ctx = *DAG.getContext();
+      Twine ErrMsg("scalar-to-vector conversion failed");
+      if (const Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+        if (const CallInst *CI = dyn_cast<CallInst>(I))
+          if (isa<InlineAsm>(CI->getCalledValue()))
+            ErrMsg = ErrMsg + ", possible invalid constraint for vector type";
+        Ctx.emitError(I, ErrMsg);
+      } else {
+        Ctx.emitError(ErrMsg);
+      }
+    }
+
      Parts[0] = Val;
      return;
    }
@@ -396,7 +420,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
      unsigned OddParts = NumParts - RoundParts;
      SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
                                   DAG.getIntPtrConstant(RoundBits));
-    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
+    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
  
      if (TLI.isBigEndian())
        // The odd parts were reversed by getCopyToParts - unreverse them.
@@ -442,7 +466,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
  /// value split into legal parts.
  static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                   SDValue Val, SDValue *Parts, unsigned NumParts,
-                                 EVT PartVT) {
+                                 EVT PartVT, const Value *V) {
    EVT ValueVT = Val.getValueType();
    assert(ValueVT.isVector() && "Not a vector");
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -528,7 +552,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
      // If the register was not expanded, promote or copy the value,
      // as appropriate.
      for (unsigned i = 0; i != NumParts; ++i)
-      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
+      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
    } else if (NumParts > 0) {
      // If the intermediate type was expanded, split each the value into
      // legal parts.
@@ -536,13 +560,10 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
             "Must expand into a divisible number of parts!");
      unsigned Factor = NumParts / NumIntermediates;
      for (unsigned i = 0; i != NumIntermediates; ++i)
-      getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
+      getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
    }
  }
  
-
-
-
  namespace {
    /// RegsForValue - This struct represents the registers (physical or virtual)
    /// that a particular set of values is assigned, and the type information
@@ -620,14 +641,15 @@ namespace {
      /// If the Flag pointer is NULL, no flag is used.
      SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
                              DebugLoc dl,
-                            SDValue &Chain, SDValue *Flag) const;
+                            SDValue &Chain, SDValue *Flag,
+                            const Value *V = 0) const;
  
      /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
      /// specified value into the registers specified by this object.  This uses
      /// Chain/Flag as the input and updates them for the output Chain/Flag.
      /// If the Flag pointer is NULL, no flag is used.
      void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
-                       SDValue &Chain, SDValue *Flag) const;
+                       SDValue &Chain, SDValue *Flag, const Value *V) const;
  
      /// AddInlineAsmOperands - Add this value to the specified inlineasm node
      /// operand list.  This adds the code marker, matching input operand index
@@ -646,7 +668,8 @@ namespace {
  SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
                                        FunctionLoweringInfo &FuncInfo,
                                        DebugLoc dl,
-                                      SDValue &Chain, SDValue *Flag) const {
+                                      SDValue &Chain, SDValue *Flag,
+                                      const Value *V) const {
    // A Value with type {} or [0 x %t] needs no registers.
    if (ValueVTs.empty())
      return SDValue();
@@ -720,7 +743,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
      }
  
      Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
-                                     NumRegs, RegisterVT, ValueVT);
+                                     NumRegs, RegisterVT, ValueVT, V);
      Part += NumRegs;
      Parts.clear();
    }
@@ -735,7 +758,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
  /// Chain/Flag as the input and updates them for the output Chain/Flag.
  /// If the Flag pointer is NULL, no flag is used.
  void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
-                                 SDValue &Chain, SDValue *Flag) const {
+                                 SDValue &Chain, SDValue *Flag,
+                                 const Value *V) const {
    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  
    // Get the list of the values's legal parts.
@@ -747,7 +771,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
      EVT RegisterVT = RegVTs[Value];
  
      getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
-                   &Parts[Part], NumParts, RegisterVT);
+                   &Parts[Part], NumParts, RegisterVT, V);
      Part += NumParts;
    }
  
@@ -823,7 +847,8 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
    AA = &aa;
    GFI = gfi;
    LibInfo = li;
-  TD = DAG.getTarget().getTargetData();
+  TD = DAG.getTarget().getDataLayout();
+  Context = DAG.getContext();
    LPadToCallSiteMap.clear();
  }
  
@@ -843,7 +868,7 @@ void SelectionDAGBuilder::clear() {
  }
  
  /// clearDanglingDebugInfo - Clear the dangling debug information
-/// map. This function is seperated from the clear so that debug
+/// map. This function is separated from the clear so that debug
  /// information that is dangling in a basic block can be properly
  /// resolved in a different basic block. This allows the
  /// SelectionDAG to resolve dangling debug information attached
@@ -941,7 +966,7 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
    default: llvm_unreachable("Unknown instruction type encountered!");
      // Build the switch statement using the Instruction.def file.
  #define HANDLE_INST(NUM, OPCODE, CLASS) \
-    case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break;
+    case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
  #include "llvm/Instruction.def"
    }
  
@@ -991,7 +1016,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
      unsigned InReg = It->second;
      RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
      SDValue Chain = DAG.getEntryNode();
-    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
      resolveDanglingDebugInfo(V, N);
      return N;
    }
@@ -1146,7 +1171,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
      unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
      RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
      SDValue Chain = DAG.getEntryNode();
-    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V);
    }
  
    llvm_unreachable("Can't get register for value!");
@@ -1202,9 +1227,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
          ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
  
          const Function *F = I.getParent()->getParent();
-        if (F->paramHasAttr(0, Attribute::SExt))
+        if (F->getRetAttributes().hasAttribute(Attributes::SExt))
            ExtendKind = ISD::SIGN_EXTEND;
-        else if (F->paramHasAttr(0, Attribute::ZExt))
+        else if (F->getRetAttributes().hasAttribute(Attributes::ZExt))
            ExtendKind = ISD::ZERO_EXTEND;
  
          if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
@@ -1215,11 +1240,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
          SmallVector<SDValue, 4> Parts(NumParts);
          getCopyToParts(DAG, getCurDebugLoc(),
                         SDValue(RetOp.getNode(), RetOp.getResNo() + j),
-                       &Parts[0], NumParts, PartVT, ExtendKind);
+                       &Parts[0], NumParts, PartVT, &I, ExtendKind);
  
          // 'inreg' on function refers to return value
          ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
-        if (F->paramHasAttr(0, Attribute::InReg))
+        if (F->getRetAttributes().hasAttribute(Attributes::InReg))
            Flags.setInReg();
  
          // Propagate extension type if any
@@ -1578,17 +1603,18 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
      } else
        Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
    } else {
-    assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+    assert(CB.CC == ISD::SETCC_INVALID &&
+           "Condition is undefined for to-the-range belonging check.");
  
      const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
      const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
  
      SDValue CmpOp = getValue(CB.CmpMHS);
      EVT VT = CmpOp.getValueType();
-
-    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+    
+    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) {
        Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
-                          ISD::SETLE);
+                          ISD::SETULE);
      } else {
        SDValue SUB = DAG.getNode(ISD::SUB, dl,
                                  VT, CmpOp, DAG.getConstant(Low, VT));
@@ -1599,7 +1625,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
  
    // Update successor info
    addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
-  addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
+  // TrueBB and FalseBB are always different unless the incoming IR is
+  // degenerate. This only happens when running llc on weird IR.
+  if (CB.TrueBB != CB.FalseBB)
+    addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
  
    // Set NextBlock to be the MBB immediately after the current one, if any.
    // This is used to avoid emitting unnecessary branches to the next block.
@@ -1760,6 +1789,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
  /// visitBitTestCase - this function produces one "bit test"
  void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                                             MachineBasicBlock* NextMBB,
+                                           uint32_t BranchWeightToNext,
                                             unsigned Reg,
                                             BitTestCase &B,
                                             MachineBasicBlock *SwitchBB) {
@@ -1797,8 +1827,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                         ISD::SETNE);
    }
  
-  addSuccessorWithWeight(SwitchBB, B.TargetBB);
-  addSuccessorWithWeight(SwitchBB, NextMBB);
+  // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
+  addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight);
+  // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
+  addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
  
    SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                                MVT::Other, getControlRoot(),
@@ -1826,9 +1858,13 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
    MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
  
    const Value *Callee(I.getCalledValue());
+  const Function *Fn = dyn_cast<Function>(Callee);
    if (isa<InlineAsm>(Callee))
      visitInlineAsm(&I);
-  else
+  else if (Fn && Fn->isIntrinsic()) {
+    assert(Fn->getIntrinsicID() == Intrinsic::donothing);
+    // Ignore invokes to @llvm.donothing: jump directly to the next BB.
+  } else
      LowerCallTo(&I, getValue(Callee), false, LandingPad);
  
    // If the value of the invoke is used outside of its defining block, make it
@@ -1901,8 +1937,6 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
                                                   const Value* SV,
                                                   MachineBasicBlock *Default,
                                                   MachineBasicBlock *SwitchBB) {
-  Case& BackCase  = *(CR.Range.second-1);
-
    // Size is the number of Cases represented by this range.
    size_t Size = CR.Range.second - CR.Range.first;
    if (Size > 3)
@@ -1919,6 +1953,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
    if (++BBI != FuncInfo.MF->end())
      NextBlock = BBI;
  
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
    // If any two of the cases has the same destination, and if one value
    // is the same as the other, but has one bit unset that the other has set,
    // use bit manipulation to do two compares at once.  For example:
@@ -1952,8 +1987,12 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
                                      ISD::SETEQ);
  
          // Update successor info.
-        addSuccessorWithWeight(SwitchBB, Small.BB);
-        addSuccessorWithWeight(SwitchBB, Default);
+        // Both Small and Big will jump to Small.BB, so we sum up the weights.
+        addSuccessorWithWeight(SwitchBB, Small.BB,
+                               Small.ExtraWeight + Big.ExtraWeight);
+        addSuccessorWithWeight(SwitchBB, Default,
+          // The default destination is the first successor in IR.
+          BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0);
  
          // Insert the true branch.
          SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
@@ -1970,11 +2009,27 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
      }
    }
  
+  // Order cases by weight so the most likely case will be checked first.
+  uint32_t UnhandledWeights = 0;
+  if (BPI) {
+    for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
+      uint32_t IWeight = I->ExtraWeight;
+      UnhandledWeights += IWeight;
+      for (CaseItr J = CR.Range.first; J < I; ++J) {
+        uint32_t JWeight = J->ExtraWeight;
+        if (IWeight > JWeight)
+          std::swap(*I, *J);
+      }
+    }
+  }
    // Rearrange the case blocks so that the last one falls through if possible.
-  if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+  Case &BackCase = *(CR.Range.second-1);
+  if (Size > 1 &&
+      NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
      // The last case block won't fall through into 'NextBlock' if we emit the
      // branches in this order.  See if rearranging a case value would help.
-    for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+    // We start at the bottom as it's the case with the least weight.
+    for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){
        if (I->BB == NextBlock) {
          std::swap(*I, BackCase);
          break;
@@ -2006,14 +2061,16 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
        CC = ISD::SETEQ;
        LHS = SV; RHS = I->High; MHS = NULL;
      } else {
-      CC = ISD::SETLE;
+      CC = ISD::SETCC_INVALID; 
        LHS = I->Low; MHS = SV; RHS = I->High;
      }
  
-    uint32_t ExtraWeight = I->ExtraWeight;
+    // The false weight should be sum of all un-handled cases.
+    UnhandledWeights -= I->ExtraWeight;
      CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
                   /* me */ CurBlock,
-                 /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2);
+                 /* trueweight */ I->ExtraWeight,
+                 /* falseweight */ UnhandledWeights);
  
      // If emitting the first comparison, just call visitSwitchCase to emit the
      // code into the current block.  Otherwise, push the CaseBlock onto the
@@ -2031,14 +2088,14 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
  }
  
  static inline bool areJTsAllowed(const TargetLowering &TLI) {
-  return !TLI.getTargetMachine().Options.DisableJumpTables &&
+  return TLI.supportJumpTables() &&
            (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
             TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
  }
  
  static APInt ComputeRange(const APInt &First, const APInt &Last) {
    uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
-  APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
+  APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth);
    return (LastExt - FirstExt + 1ULL);
  }
  
@@ -2058,7 +2115,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
    for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
      TSize += I->size();
  
-  if (!areJTsAllowed(TLI) || TSize.ult(4))
+  if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries()))
      return false;
  
    APInt Range = ComputeRange(First, Last);
@@ -2104,7 +2161,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
      const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
      const APInt &High = cast<ConstantInt>(I->High)->getValue();
  
-    if (Low.sle(TEI) && TEI.sle(High)) {
+    if (Low.ule(TEI) && TEI.ule(High)) {
        DestBBs.push_back(I->BB);
        if (TEI==High)
          ++I;
@@ -2113,13 +2170,28 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
      }
    }
  
+  // Calculate weight for each unique destination in CR.
+  DenseMap<MachineBasicBlock*, uint32_t> DestWeights;
+  if (FuncInfo.BPI)
+    for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+      DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
+          DestWeights.find(I->BB);
+      if (Itr != DestWeights.end()) 
+        Itr->second += I->ExtraWeight;
+      else
+        DestWeights[I->BB] = I->ExtraWeight;
+    }
+
    // Update successor info. Add one edge to each unique successor.
    BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
    for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
           E = DestBBs.end(); I != E; ++I) {
      if (!SuccsHandled[(*I)->getNumber()]) {
        SuccsHandled[(*I)->getNumber()] = true;
-      addSuccessorWithWeight(JumpTableBB, *I);
+      DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
+          DestWeights.find(*I);
+      addSuccessorWithWeight(JumpTableBB, *I,
+                             Itr != DestWeights.end() ? Itr->second : 0);
      }
    }
  
@@ -2261,7 +2333,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
    // Create a CaseBlock record representing a conditional branch to
    // the LHS node if the value being switched on SV is less than C.
    // Otherwise, branch to LHS.
-  CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+  CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
  
    if (CR.CaseBB == SwitchBB)
      visitSwitchCase(CB, SwitchBB);
@@ -2333,7 +2405,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
    // Optimize the case where all the case values fit in a
    // word without having to subtract minValue. In this case,
    // we can optimize away the subtraction.
-  if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
+  if (maxValue.ult(IntPtrBits)) {
      cmpRange = maxValue;
    } else {
      lowBound = minValue;
@@ -2350,7 +2422,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
  
      if (i == count) {
        assert((count < 3) && "Too much destinations to test!");
-      CasesBits.push_back(CaseBits(0, Dest, 0));
+      CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/));
        count++;
      }
  
@@ -2359,6 +2431,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
  
      uint64_t lo = (lowValue - lowBound).getZExtValue();
      uint64_t hi = (highValue - lowBound).getZExtValue();
+    CasesBits[i].ExtraWeight += I->ExtraWeight;
  
      for (uint64_t j = lo; j <= hi; j++) {
        CasesBits[i].Mask |=  1ULL << j;
@@ -2386,7 +2459,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
      CurMF->insert(BBI, CaseBB);
      BTC.push_back(BitTestCase(CasesBits[i].Mask,
                                CaseBB,
-                              CasesBits[i].BB));
+                              CasesBits[i].BB, CasesBits[i].ExtraWeight));
  
      // Put SV in a virtual register to make it available from the new blocks.
      ExportFromCurrentBlock(SV);
@@ -2407,57 +2480,41 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
  /// Clusterify - Transform simple list of Cases into list of CaseRange's
  size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
                                         const SwitchInst& SI) {
-  size_t numCmps = 0;
+  
+  /// Use a shorter form of declaration, and also
+  /// show the we want to use CRSBuilder as Clusterifier.
+  typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier;
+  
+  Clusterifier TheClusterifier;
  
    BranchProbabilityInfo *BPI = FuncInfo.BPI;
    // Start with "simple" cases
-  for (size_t i = 0; i < SI.getNumCases(); ++i) {
-    BasicBlock *SuccBB = SI.getCaseSuccessor(i);
+  for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+       i != e; ++i) {
+    const BasicBlock *SuccBB = i.getCaseSuccessor();
      MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
  
-    uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
-
-    Cases.push_back(Case(SI.getCaseValue(i),
-                         SI.getCaseValue(i),
-                         SMBB, ExtraWeight));
-  }
-  std::sort(Cases.begin(), Cases.end(), CaseCmp());
-
-  // Merge case into clusters
-  if (Cases.size() >= 2)
-    // Must recompute end() each iteration because it may be
-    // invalidated by erase if we hold on to it
-    for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
-         J != Cases.end(); ) {
-      const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
-      const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
-      MachineBasicBlock* nextBB = J->BB;
-      MachineBasicBlock* currentBB = I->BB;
-
-      // If the two neighboring cases go to the same destination, merge them
-      // into a single case.
-      if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
-        I->High = J->High;
-        J = Cases.erase(J);
-
-        if (BranchProbabilityInfo *BPI = FuncInfo.BPI) {
-          uint32_t CurWeight = currentBB->getBasicBlock() ?
-            BPI->getEdgeWeight(SI.getParent(), currentBB->getBasicBlock()) : 16;
-          uint32_t NextWeight = nextBB->getBasicBlock() ?
-            BPI->getEdgeWeight(SI.getParent(), nextBB->getBasicBlock()) : 16;
-
-          BPI->setEdgeWeight(SI.getParent(), currentBB->getBasicBlock(),
-                             CurWeight + NextWeight);
-        }
-      } else {
-        I = J++;
-      }
-    }
-
-  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
-    if (I->Low != I->High)
-      // A range counts double, since it requires two compares.
-      ++numCmps;
+    TheClusterifier.add(i.getCaseValueEx(), SMBB, 
+        BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0);
+  }
+  
+  TheClusterifier.optimize();
+  
+  size_t numCmps = 0;
+  for (Clusterifier::RangeIterator i = TheClusterifier.begin(),
+       e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
+    Clusterifier::Cluster &C = *i;
+    // Update edge weight for the cluster.
+    unsigned W = C.first.Weight;
+
+    // FIXME: Currently work with ConstantInt based numbers.
+    // Changing it to APInt based is a pretty heavy for this commit.
+    Cases.push_back(Case(C.first.getLow().toConstantInt(),
+                         C.first.getHigh().toConstantInt(), C.second, W));
+    
+    if (C.first.getLow() != C.first.getHigh())
+    // A range counts double, since it requires two compares.
+    ++numCmps;
    }
  
    return numCmps;
@@ -2530,9 +2587,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
      if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
        continue;
  
-    // If the switch has more than 5 blocks, and at least 40% dense, and the
+    // If the switch has more than N blocks, and is at least 40% dense, and the
      // target supports indirect branches, then emit a jump table rather than
      // lowering the switch to a binary tree of conditional branches.
+    // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries().
      if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
        continue;
  
@@ -2546,14 +2604,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
    MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
  
    // Update machine-CFG edges with unique successors.
-  SmallVector<BasicBlock*, 32> succs;
-  succs.reserve(I.getNumSuccessors());
-  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
-    succs.push_back(I.getSuccessor(i));
-  array_pod_sort(succs.begin(), succs.end());
-  succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
-  for (unsigned i = 0, e = succs.size(); i != e; ++i) {
-    MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+  SmallSet<BasicBlock*, 32> Done;
+  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
+    BasicBlock *BB = I.getSuccessor(i);
+    bool Inserted = Done.insert(BB);
+    if (!Inserted)
+        continue;
+
+    MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
      addSuccessorWithWeight(IndirectBrMBB, Succ);
    }
  
@@ -2804,11 +2862,11 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
  }
  
  // Utility for visitShuffleVector - Return true if every element in Mask,
-// begining // from position Pos and ending in Pos+Size, falls within the
+// beginning from position Pos and ending in Pos+Size, falls within the
  // specified sequential range [L, L+Pos). or is undef.
  static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
-                                int Pos, int Size, int Low) {
-  for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+                                unsigned Pos, unsigned Size, int Low) {
+  for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
      if (Mask[i] >= 0 && Mask[i] != Low)
        return false;
    return true;
@@ -2878,10 +2936,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
      SmallVector<int, 8> MappedOps;
      for (unsigned i = 0; i != MaskNumElts; ++i) {
        int Idx = Mask[i];
-      if (Idx < (int)SrcNumElts)
-        MappedOps.push_back(Idx);
-      else
-        MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+      if (Idx >= (int)SrcNumElts)
+        Idx -= SrcNumElts - MaskNumElts;
+      MappedOps.push_back(Idx);
      }
  
      setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
@@ -2893,13 +2950,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
      // Analyze the access pattern of the vector to see if we can extract
      // two subvectors and do the shuffle. The analysis is done by calculating
      // the range of elements the mask access on both vectors.
-    int MinRange[2] = { static_cast<int>(SrcNumElts+1),
-                        static_cast<int>(SrcNumElts+1)};
+    int MinRange[2] = { static_cast<int>(SrcNumElts),
+                        static_cast<int>(SrcNumElts)};
      int MaxRange[2] = {-1, -1};
  
      for (unsigned i = 0; i != MaskNumElts; ++i) {
        int Idx = Mask[i];
-      int Input = 0;
+      unsigned Input = 0;
        if (Idx < 0)
          continue;
  
@@ -2915,35 +2972,31 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
  
      // Check if the access is smaller than the vector size and can we find
      // a reasonable extract index.
-    int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not
-                                 // Extract.
+    int RangeUse[2] = { -1, -1 };  // 0 = Unused, 1 = Extract, -1 = Can not
+                                   // Extract.
      int StartIdx[2];  // StartIdx to extract from
-    for (int Input=0; Input < 2; ++Input) {
-      if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+    for (unsigned Input = 0; Input < 2; ++Input) {
+      if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
          RangeUse[Input] = 0; // Unused
          StartIdx[Input] = 0;
-      } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
-        // Fits within range but we should see if we can find a good
-        // start index that is a multiple of the mask length.
-        if (MaxRange[Input] < (int)MaskNumElts) {
-          RangeUse[Input] = 1; // Extract from beginning of the vector
-          StartIdx[Input] = 0;
-        } else {
-          StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
-          if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
-              StartIdx[Input] + MaskNumElts <= SrcNumElts)
-            RangeUse[Input] = 1; // Extract from a multiple of the mask length.
-        }
+        continue;
        }
+
+      // Find a good start index that is a multiple of the mask length. Then
+      // see if the rest of the elements are in range.
+      StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+      if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+          StartIdx[Input] + MaskNumElts <= SrcNumElts)
+        RangeUse[Input] = 1; // Extract from a multiple of the mask length.
      }
  
      if (RangeUse[0] == 0 && RangeUse[1] == 0) {
        setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
        return;
      }
-    else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+    if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
        // Extract appropriate subvector and generate a vector shuffle
-      for (int Input=0; Input < 2; ++Input) {
+      for (unsigned Input = 0; Input < 2; ++Input) {
          SDValue &Src = Input == 0 ? Src1 : Src2;
          if (RangeUse[Input] == 0)
            Src = DAG.getUNDEF(VT);
@@ -2956,12 +3009,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
        SmallVector<int, 8> MappedOps;
        for (unsigned i = 0; i != MaskNumElts; ++i) {
          int Idx = Mask[i];
-        if (Idx < 0)
-          MappedOps.push_back(Idx);
-        else if (Idx < (int)SrcNumElts)
-          MappedOps.push_back(Idx - StartIdx[0]);
-        else
-          MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+        if (Idx >= 0) {
+          if (Idx < (int)SrcNumElts)
+            Idx -= StartIdx[0];
+          else
+            Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+        }
+        MappedOps.push_back(Idx);
        }
  
        setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
@@ -2977,22 +3031,20 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
    EVT PtrVT = TLI.getPointerTy();
    SmallVector<SDValue,8> Ops;
    for (unsigned i = 0; i != MaskNumElts; ++i) {
-    if (Mask[i] < 0) {
-      Ops.push_back(DAG.getUNDEF(EltVT));
-    } else {
-      int Idx = Mask[i];
-      SDValue Res;
+    int Idx = Mask[i];
+    SDValue Res;
  
-      if (Idx < (int)SrcNumElts)
-        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
-                          EltVT, Src1, DAG.getConstant(Idx, PtrVT));
-      else
-        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
-                          EltVT, Src2,
-                          DAG.getConstant(Idx - SrcNumElts, PtrVT));
+    if (Idx < 0) {
+      Res = DAG.getUNDEF(EltVT);
+    } else {
+      SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
+      if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
  
-      Ops.push_back(Res);
+      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                        EltVT, Src, DAG.getConstant(Idx, PtrVT));
      }
+
+    Ops.push_back(Res);
    }
  
    setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
@@ -3156,9 +3208,9 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
      return;   // getValue will auto-populate this.
  
    Type *Ty = I.getAllocatedType();
-  uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+  uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
    unsigned Align =
-    std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+    std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty),
               I.getAlignment());
  
    SDValue AllocSize = getValue(I.getArraySize());
@@ -3215,6 +3267,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
    bool isInvariant = I.getMetadata("invariant.load") != 0;
    unsigned Alignment = I.getAlignment();
    const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
  
    SmallVector<EVT, 4> ValueVTs;
    SmallVector<uint64_t, 4> Offsets;
@@ -3262,7 +3315,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
                              DAG.getConstant(Offsets[i], PtrVT));
      SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
                              A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
-                            isNonTemporal, isInvariant, Alignment, TBAAInfo);
+                            isNonTemporal, isInvariant, Alignment, TBAAInfo,
+                            Ranges);
  
      Values[i] = L;
      Chains[ChainI] = L.getValue(1);
@@ -3454,7 +3508,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
  
    SDValue InChain = getRoot();
  
-  EVT VT = EVT::getEVT(I.getType());
+  EVT VT = TLI.getValueType(I.getType());
  
    if (I.getAlignment() * 8 < VT.getSizeInBits())
      report_fatal_error("Cannot generate unaligned atomic load");
@@ -3484,7 +3538,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
  
    SDValue InChain = getRoot();
  
-  EVT VT = EVT::getEVT(I.getValueOperand()->getType());
+  EVT VT = TLI.getValueType(I.getValueOperand()->getType());
  
    if (I.getAlignment() * 8 < VT.getSizeInBits())
      report_fatal_error("Cannot generate unaligned atomic store");
@@ -3586,6 +3640,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
      }
  
      setValue(&I, Result);
+  } else {
+    // Assign order to result here. If the intrinsic does not produce a result,
+    // it won't be mapped to a SDNode and visit() will not assign it an order
+    // number.
+    ++SDNodeOrder;
+    AssignOrderingToNode(Result.getNode());
    }
  }
  
@@ -3627,17 +3687,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
    return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
  }
  
-// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
-const char *
-SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
-  SDValue Op1 = getValue(I.getArgOperand(0));
-  SDValue Op2 = getValue(I.getArgOperand(1));
-
-  SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
-  setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
-  return 0;
-}
-
  /// visitExp - Lower an exp intrinsic. Handles the special sequences for
  /// limited-precision mode.
  void
@@ -4351,7 +4400,7 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
        return DAG.getConstantFP(1.0, LHS.getValueType());
  
      const Function *F = DAG.getMachineFunction().getFunction();
-    if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
+    if (!F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize) ||
          // If optimizing for size, don't insert too many multiplies.  This
          // inserts up to 5 multiplies.
          CountPopulation_32(Val)+Log2_32(Val) < 7) {
@@ -4397,9 +4446,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
      const SDValue &CFR = Ext.getOperand(0);
      if (CFR.getOpcode() == ISD::CopyFromReg)
        return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
-    else
-      if (CFR.getOpcode() == ISD::TRUNCATE)
-        return getTruncatedArgReg(CFR);
+    if (CFR.getOpcode() == ISD::TRUNCATE)
+      return getTruncatedArgReg(CFR);
    }
    return 0;
  }
@@ -4428,7 +4476,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
    // Some arguments' frame index is recorded during argument lowering.
    Offset = FuncInfo.getArgumentFrameIndex(Arg);
    if (Offset)
-      Reg = TRI->getFrameRegister(MF);
+    Reg = TRI->getFrameRegister(MF);
  
    if (!Reg && N.getNode()) {
      if (N.getOpcode() == ISD::CopyFromReg)
@@ -4503,9 +4551,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                               getValue(I.getArgOperand(0))));
      return 0;
    case Intrinsic::setjmp:
-    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+    return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
    case Intrinsic::longjmp:
-    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+    return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
    case Intrinsic::memcpy: {
      // Assert for address < 256 since we support only user defined address
      // spaces.
@@ -4561,8 +4609,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
      MDNode *Variable = DI.getVariable();
      const Value *Address = DI.getAddress();
-    if (!Address || !DIVariable(Variable).Verify())
+    if (!Address || !DIVariable(Variable).Verify()) {
+      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
        return 0;
+    }
  
      // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
      // but do not always have a corresponding SDNode built.  The SDNodeOrder
@@ -4688,8 +4738,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
        V = BCI->getOperand(0);
      const AllocaInst *AI = dyn_cast<AllocaInst>(V);
      // Don't handle byval struct arguments or VLAs, for example.
-    if (!AI)
+    if (!AI) {
+      DEBUG(dbgs() << "Dropping debug location info for:\n  " << DI << "\n");
+      DEBUG(dbgs() << "  Last seen at:\n    " << *V << "\n");
        return 0;
+    }
      DenseMap<const AllocaInst*, int>::iterator SI =
        FuncInfo.StaticAllocaMap.find(AI);
      if (SI == FuncInfo.StaticAllocaMap.end())
@@ -4835,7 +4888,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    }
    case Intrinsic::x86_avx_vinsertf128_pd_256:
    case Intrinsic::x86_avx_vinsertf128_ps_256:
-  case Intrinsic::x86_avx_vinsertf128_si_256: {
+  case Intrinsic::x86_avx_vinsertf128_si_256:
+  case Intrinsic::x86_avx2_vinserti128: {
      DebugLoc dl = getCurDebugLoc();
      EVT DestVT = TLI.getValueType(I.getType());
      EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
@@ -4844,7 +4898,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
                        getValue(I.getArgOperand(0)),
                        getValue(I.getArgOperand(1)),
-                      DAG.getConstant(Idx, MVT::i32));
+                      DAG.getIntPtrConstant(Idx));
+    setValue(&I, Res);
+    return 0;
+  }
+  case Intrinsic::x86_avx_vextractf128_pd_256:
+  case Intrinsic::x86_avx_vextractf128_ps_256:
+  case Intrinsic::x86_avx_vextractf128_si_256:
+  case Intrinsic::x86_avx2_vextracti128: {
+    DebugLoc dl = getCurDebugLoc();
+    EVT DestVT = TLI.getValueType(I.getType());
+    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
+                   DestVT.getVectorNumElements();
+    Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
+                      getValue(I.getArgOperand(0)),
+                      DAG.getIntPtrConstant(Idx));
      setValue(&I, Res);
      return 0;
    }
@@ -4859,6 +4927,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    case Intrinsic::convertuu: {
      ISD::CvtCode Code = ISD::CVT_INVALID;
      switch (Intrinsic) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
      case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
      case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
      case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
@@ -4917,6 +4986,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    case Intrinsic::pow:
      visitPow(I);
      return 0;
+  case Intrinsic::fabs:
+    setValue(&I, DAG.getNode(ISD::FABS, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::floor:
+    setValue(&I, DAG.getNode(ISD::FFLOOR, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
    case Intrinsic::fma:
      setValue(&I, DAG.getNode(ISD::FMA, dl,
                               getValue(I.getArgOperand(0)).getValueType(),
@@ -4924,6 +5003,29 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                               getValue(I.getArgOperand(1)),
                               getValue(I.getArgOperand(2))));
      return 0;
+  case Intrinsic::fmuladd: {
+    EVT VT = TLI.getValueType(I.getType());
+    if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
+        TLI.isOperationLegal(ISD::FMA, VT) &&
+        TLI.isFMAFasterThanMulAndAdd(VT)){
+      setValue(&I, DAG.getNode(ISD::FMA, dl,
+                               getValue(I.getArgOperand(0)).getValueType(),
+                               getValue(I.getArgOperand(0)),
+                               getValue(I.getArgOperand(1)),
+                               getValue(I.getArgOperand(2))));
+    } else {
+      SDValue Mul = DAG.getNode(ISD::FMUL, dl,
+                                getValue(I.getArgOperand(0)).getValueType(),
+                                getValue(I.getArgOperand(0)),
+                                getValue(I.getArgOperand(1)));
+      SDValue Add = DAG.getNode(ISD::FADD, dl,
+                                getValue(I.getArgOperand(0)).getValueType(),
+                                Mul,
+                                getValue(I.getArgOperand(2)));
+      setValue(&I, Add);
+    }
+    return 0;
+  }
    case Intrinsic::convert_to_fp16:
      setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
                               MVT::i16, getValue(I.getArgOperand(0))));
@@ -5053,7 +5155,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    }
    case Intrinsic::gcroot:
      if (GFI) {
-      const Value *Alloca = I.getArgOperand(0);
+      const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
        const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
  
        FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
@@ -5073,35 +5175,51 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      return 0;
    }
  
+  case Intrinsic::debugtrap:
    case Intrinsic::trap: {
      StringRef TrapFuncName = TM.Options.getTrapFunctionName();
      if (TrapFuncName.empty()) {
-      DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+      ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? 
+        ISD::TRAP : ISD::DEBUGTRAP;
+      DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot()));
        return 0;
      }
      TargetLowering::ArgListTy Args;
-    std::pair<SDValue, SDValue> Result =
-      TLI.LowerCallTo(getRoot(), I.getType(),
+    TargetLowering::
+    CallLoweringInfo CLI(getRoot(), I.getType(),
                   false, false, false, false, 0, CallingConv::C,
-                 /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+                 /*isTailCall=*/false,
+                 /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
                   DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
                   Args, DAG, getCurDebugLoc());
+    std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
      DAG.setRoot(Result.second);
      return 0;
    }
+
    case Intrinsic::uadd_with_overflow:
-    return implVisitAluOverflow(I, ISD::UADDO);
    case Intrinsic::sadd_with_overflow:
-    return implVisitAluOverflow(I, ISD::SADDO);
    case Intrinsic::usub_with_overflow:
-    return implVisitAluOverflow(I, ISD::USUBO);
    case Intrinsic::ssub_with_overflow:
-    return implVisitAluOverflow(I, ISD::SSUBO);
    case Intrinsic::umul_with_overflow:
-    return implVisitAluOverflow(I, ISD::UMULO);
-  case Intrinsic::smul_with_overflow:
-    return implVisitAluOverflow(I, ISD::SMULO);
+  case Intrinsic::smul_with_overflow: {
+    ISD::NodeType Op;
+    switch (Intrinsic) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
+    case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
+    case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
+    case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
+    case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
+    case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
+    }
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
  
+    SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+    setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+    return 0;
+  }
    case Intrinsic::prefetch: {
      SDValue Ops[5];
      unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
@@ -5121,16 +5239,45 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                          rw==1)); /* write */
      return 0;
    }
+  case Intrinsic::lifetime_start:
+  case Intrinsic::lifetime_end: {
+    bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
+    // Stack coloring is not enabled in O0, discard region information.
+    if (TM.getOptLevel() == CodeGenOpt::None)
+      return 0;
+
+    SmallVector<Value *, 4> Allocas;
+    GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD);
+
+    for (SmallVector<Value*, 4>::iterator Object = Allocas.begin(),
+         E = Allocas.end(); Object != E; ++Object) {
+      AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
+
+      // Could not find an Alloca.
+      if (!LifetimeObject)
+        continue;
  
+      int FI = FuncInfo.StaticAllocaMap[LifetimeObject];
+
+      SDValue Ops[2];
+      Ops[0] = getRoot();
+      Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
+      unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
+
+      Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2);
+      DAG.setRoot(Res);
+    }
+  }
    case Intrinsic::invariant_start:
-  case Intrinsic::lifetime_start:
      // Discard region information.
      setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
      return 0;
    case Intrinsic::invariant_end:
-  case Intrinsic::lifetime_end:
      // Discard region information.
      return 0;
+  case Intrinsic::donothing:
+    // ignore
+    return 0;
    }
  }
  
@@ -5149,22 +5296,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
  
    // Check whether the function can return without sret-demotion.
    SmallVector<ISD::OutputArg, 4> Outs;
-  SmallVector<uint64_t, 4> Offsets;
    GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
-                Outs, TLI, &Offsets);
+                Outs, TLI);
  
    bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
-                                          DAG.getMachineFunction(),
-                                          FTy->isVarArg(), Outs,
-                                          FTy->getContext());
+                                           DAG.getMachineFunction(),
+                                           FTy->isVarArg(), Outs,
+                                           FTy->getContext());
  
    SDValue DemoteStackSlot;
    int DemoteStackIdx = -100;
  
    if (!CanLowerReturn) {
-    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
+    uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(
                        FTy->getReturnType());
-    unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
+    unsigned Align  = TLI.getDataLayout()->getPrefTypeAlignment(
                        FTy->getReturnType());
      MachineFunction &MF = DAG.getMachineFunction();
      DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
@@ -5196,12 +5342,12 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
      Entry.Node = ArgNode; Entry.Ty = V->getType();
  
      unsigned attrInd = i - CS.arg_begin() + 1;
-    Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
-    Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
-    Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
-    Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
-    Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
-    Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+    Entry.isSExt  = CS.paramHasAttr(attrInd, Attributes::SExt);
+    Entry.isZExt  = CS.paramHasAttr(attrInd, Attributes::ZExt);
+    Entry.isInReg = CS.paramHasAttr(attrInd, Attributes::InReg);
+    Entry.isSRet  = CS.paramHasAttr(attrInd, Attributes::StructRet);
+    Entry.isNest  = CS.paramHasAttr(attrInd, Attributes::Nest);
+    Entry.isByVal = CS.paramHasAttr(attrInd, Attributes::ByVal);
      Entry.Alignment = CS.getParamAlignment(attrInd);
      Args.push_back(Entry);
    }
@@ -5239,15 +5385,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
    if (isTailCall && TM.Options.EnableFastISel)
      isTailCall = false;
  
-  std::pair<SDValue,SDValue> Result =
-    TLI.LowerCallTo(getRoot(), RetTy,
-                    CS.paramHasAttr(0, Attribute::SExt),
-                    CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
-                    CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
-                    CS.getCallingConv(),
-                    isTailCall,
-                    !CS.getInstruction()->use_empty(),
-                    Callee, Args, DAG, getCurDebugLoc());
+  TargetLowering::
+  CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG,
+                       getCurDebugLoc(), CS);
+  std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(CLI);
    assert((isTailCall || Result.second.getNode()) &&
           "Non-null chain expected with non-tail call!");
    assert((Result.second.getNode() || !Result.first.getNode()) &&
@@ -5263,7 +5404,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
      ComputeValueVTs(TLI, PtrRetTy, PVTs);
      assert(PVTs.size() == 1 && "Pointers should fit in one register");
      EVT PtrVT = PVTs[0];
-    unsigned NumValues = Outs.size();
+
+    SmallVector<EVT, 4> RetTys;
+    SmallVector<uint64_t, 4> Offsets;
+    RetTy = FTy->getReturnType();
+    ComputeValueVTs(TLI, RetTy, RetTys, &Offsets);
+
+    unsigned NumValues = RetTys.size();
      SmallVector<SDValue, 4> Values(NumValues);
      SmallVector<SDValue, 4> Chains(NumValues);
  
@@ -5271,8 +5418,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
        SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
                                  DemoteStackSlot,
                                  DAG.getConstant(Offsets[i], PtrVT));
-      SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
-                              Add,
+      SDValue L = DAG.getLoad(RetTys[i], getCurDebugLoc(), Result.second, Add,
                    MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
                                false, false, false, 1);
        Values[i] = L;
@@ -5283,30 +5429,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                  MVT::Other, &Chains[0], NumValues);
      PendingLoads.push_back(Chain);
  
-    // Collect the legal value parts into potentially illegal values
-    // that correspond to the original function's return values.
-    SmallVector<EVT, 4> RetTys;
-    RetTy = FTy->getReturnType();
-    ComputeValueVTs(TLI, RetTy, RetTys);
-    ISD::NodeType AssertOp = ISD::DELETED_NODE;
-    SmallVector<SDValue, 4> ReturnValues;
-    unsigned CurReg = 0;
-    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
-      EVT VT = RetTys[I];
-      EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
-      unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
-
-      SDValue ReturnValue =
-        getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
-                         RegisterVT, VT, AssertOp);
-      ReturnValues.push_back(ReturnValue);
-      CurReg += NumRegs;
-    }
-
      setValue(CS.getInstruction(),
               DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
                           DAG.getVTList(&RetTys[0], RetTys.size()),
-                         &ReturnValues[0], ReturnValues.size()));
+                         &Values[0], Values.size()));
    }
  
    // Assign order to nodes here. If the call does not produce a result, it won't
@@ -5473,6 +5599,22 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
    return false;
  }
  
+/// visitUnaryFloatCall - If a call instruction is a unary floating-point
+/// operation (as expected), translate it to an SDNode with the specified opcode
+/// and return true.
+bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
+                                              unsigned Opcode) {
+  // Sanity check that it really is a unary floating-point call.
+  if (I.getNumArgOperands() != 1 ||
+      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
+      I.getType() != I.getArgOperand(0)->getType() ||
+      !I.onlyReadsMemory())
+    return false;
+
+  SDValue Tmp = getValue(I.getArgOperand(0));
+  setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp));
+  return true;
+}
  
  void SelectionDAGBuilder::visitCall(const CallInst &I) {
    // Handle inline assembly differently.
@@ -5503,148 +5645,97 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
  
      // Check for well-known libc/libm calls.  If the function is internal, it
      // can't be a library call.
-    if (!F->hasLocalLinkage() && F->hasName()) {
-      StringRef Name = F->getName();
-      if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
-          (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
-          (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
+    LibFunc::Func Func;
+    if (!F->hasLocalLinkage() && F->hasName() &&
+        LibInfo->getLibFunc(F->getName(), Func) &&
+        LibInfo->hasOptimizedCodeGen(Func)) {
+      switch (Func) {
+      default: break;
+      case LibFunc::copysign:
+      case LibFunc::copysignf:
+      case LibFunc::copysignl:
          if (I.getNumArgOperands() == 2 &&   // Basic sanity checks.
              I.getArgOperand(0)->getType()->isFloatingPointTy() &&
              I.getType() == I.getArgOperand(0)->getType() &&
-            I.getType() == I.getArgOperand(1)->getType()) {
+            I.getType() == I.getArgOperand(1)->getType() &&
+            I.onlyReadsMemory()) {
            SDValue LHS = getValue(I.getArgOperand(0));
            SDValue RHS = getValue(I.getArgOperand(1));
            setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
                                     LHS.getValueType(), LHS, RHS));
            return;
          }
-      } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
-                 (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
-                 (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
-        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::fabs:
+      case LibFunc::fabsf:
+      case LibFunc::fabsl:
+        if (visitUnaryFloatCall(I, ISD::FABS))
            return;
-        }
-      } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
-                 (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
-                 (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
-        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType() &&
-            I.onlyReadsMemory()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::sin:
+      case LibFunc::sinf:
+      case LibFunc::sinl:
+        if (visitUnaryFloatCall(I, ISD::FSIN))
            return;
-        }
-      } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
-                 (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
-                 (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
-        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType() &&
-            I.onlyReadsMemory()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::cos:
+      case LibFunc::cosf:
+      case LibFunc::cosl:
+        if (visitUnaryFloatCall(I, ISD::FCOS))
            return;
-        }
-      } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
-                 (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
-                 (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
-        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType() &&
-            I.onlyReadsMemory()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::sqrt:
+      case LibFunc::sqrtf:
+      case LibFunc::sqrtl:
+        if (visitUnaryFloatCall(I, ISD::FSQRT))
            return;
-        }
-      } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
-                 (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
-                 (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::floor:
+      case LibFunc::floorf:
+      case LibFunc::floorl:
+        if (visitUnaryFloatCall(I, ISD::FFLOOR))
            return;
-        }
-      } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
-                 (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
-                 (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::nearbyint:
+      case LibFunc::nearbyintf:
+      case LibFunc::nearbyintl:
+        if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
            return;
-        }
-      } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
-                 (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
-                 (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::ceil:
+      case LibFunc::ceilf:
+      case LibFunc::ceill:
+        if (visitUnaryFloatCall(I, ISD::FCEIL))
            return;
-        }
-      } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
-                 (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
-                 (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::rint:
+      case LibFunc::rintf:
+      case LibFunc::rintl:
+        if (visitUnaryFloatCall(I, ISD::FRINT))
            return;
-        }
-      } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
-                 (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
-                 (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::trunc:
+      case LibFunc::truncf:
+      case LibFunc::truncl:
+        if (visitUnaryFloatCall(I, ISD::FTRUNC))
            return;
-        }
-      } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
-                 (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
-                 (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::log2:
+      case LibFunc::log2f:
+      case LibFunc::log2l:
+        if (visitUnaryFloatCall(I, ISD::FLOG2))
            return;
-        }
-      } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
-                 (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
-                 (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
-        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
-            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
-            I.getType() == I.getArgOperand(0)->getType()) {
-          SDValue Tmp = getValue(I.getArgOperand(0));
-          setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
-                                   Tmp.getValueType(), Tmp));
+        break;
+      case LibFunc::exp2:
+      case LibFunc::exp2f:
+      case LibFunc::exp2l:
+        if (visitUnaryFloatCall(I, ISD::FEXP2))
            return;
-        }
-      } else if (Name == "memcmp") {
+        break;
+      case LibFunc::memcmp:
          if (visitMemCmpCall(I))
            return;
+        break;
        }
      }
    }
@@ -5684,7 +5775,7 @@ public:
    /// MVT::Other.
    EVT getCallOperandValEVT(LLVMContext &Context,
                             const TargetLowering &TLI,
-                           const TargetData *TD) const {
+                           const DataLayout *TD) const {
      if (CallOperandVal == 0) return MVT::Other;
  
      if (isa<BasicBlock>(CallOperandVal))
@@ -5941,11 +6032,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
  
        if (OpInfo.ConstraintVT != Input.ConstraintVT) {
-       std::pair<unsigned, const TargetRegisterClass*> MatchRC =
-         TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+        std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+          TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
                                             OpInfo.ConstraintVT);
-       std::pair<unsigned, const TargetRegisterClass*> InputRC =
-         TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
+        std::pair<unsigned, const TargetRegisterClass*> InputRC =
+          TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
                                             Input.ConstraintVT);
          if ((OpInfo.ConstraintVT.isInteger() !=
               Input.ConstraintVT.isInteger()) ||
@@ -5988,8 +6079,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
          // Otherwise, create a stack slot and emit a store to it before the
          // asm.
          Type *Ty = OpVal->getType();
-        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
-        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+        uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty);
+        unsigned Align  = TLI.getDataLayout()->getPrefTypeAlignment(Ty);
          MachineFunction &MF = DAG.getMachineFunction();
          int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
          SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
@@ -6037,12 +6128,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
    const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
    AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
  
-  // Remember the HasSideEffect and AlignStack bits as operand 3.
+  // Remember the HasSideEffect, AlignStack and AsmDialect bits as operand 3.
    unsigned ExtraInfo = 0;
    if (IA->hasSideEffects())
      ExtraInfo |= InlineAsm::Extra_HasSideEffects;
    if (IA->isAlignStack())
      ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+  // Set the asm dialect.
+  ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
    AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
                                                    TLI.getPointerTy()));
  
@@ -6152,7 +6245,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
  
            // Use the produced MatchedRegs object to
            MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
-                                    Chain, &Flag);
+                                    Chain, &Flag, CS.getInstruction());
            MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
                                             true, OpInfo.getMatchedOperand(),
                                             DAG, AsmNodeOperands);
@@ -6214,8 +6307,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
                OpInfo.ConstraintType == TargetLowering::C_Register) &&
               "Unknown constraint type!");
-      assert(!OpInfo.isIndirect &&
-             "Don't know how to handle indirect register inputs yet!");
+
+      // TODO: Support this.
+      if (OpInfo.isIndirect) {
+        LLVMContext &Ctx = *DAG.getContext();
+        Ctx.emitError(CS.getInstruction(),
+                      "Don't know how to handle indirect register inputs yet "
+                      "for constraint '" + Twine(OpInfo.ConstraintCode) + "'");
+        break;
+      }
  
        // Copy the input into the appropriate registers.
        if (OpInfo.AssignedRegs.Regs.empty()) {
@@ -6227,7 +6327,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        }
  
        OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
-                                        Chain, &Flag);
+                                        Chain, &Flag, CS.getInstruction());
  
        OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
                                                 DAG, AsmNodeOperands);
@@ -6258,7 +6358,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
    // and set it as the value of the call.
    if (!RetValRegs.Regs.empty()) {
      SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
-                                             Chain, &Flag);
+                                             Chain, &Flag, CS.getInstruction());
  
      // FIXME: Why don't we do this for inline asms with MRVs?
      if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
@@ -6298,7 +6398,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
      RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
      const Value *Ptr = IndirectStoresToEmit[i].second;
      SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
-                                             Chain, &Flag);
+                                             Chain, &Flag, IA);
      StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
    }
  
@@ -6328,7 +6428,7 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
  }
  
  void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
-  const TargetData &TD = *TLI.getTargetData();
+  const DataLayout &TD = *TLI.getDataLayout();
    SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
                             getRoot(), getValue(I.getOperand(0)),
                             DAG.getSrcValue(I.getOperand(0)),
@@ -6358,29 +6458,23 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
  /// FIXME: When all targets are
  /// migrated to using LowerCall, this hook should be integrated into SDISel.
  std::pair<SDValue, SDValue>
-TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
-                            bool RetSExt, bool RetZExt, bool isVarArg,
-                            bool isInreg, unsigned NumFixedArgs,
-                            CallingConv::ID CallConv, bool isTailCall,
-                            bool isReturnValueUsed,
-                            SDValue Callee,
-                            ArgListTy &Args, SelectionDAG &DAG,
-                            DebugLoc dl) const {
+TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
    // Handle all of the outgoing arguments.
-  SmallVector<ISD::OutputArg, 32> Outs;
-  SmallVector<SDValue, 32> OutVals;
+  CLI.Outs.clear();
+  CLI.OutVals.clear();
+  ArgListTy &Args = CLI.Args;
    for (unsigned i = 0, e = Args.size(); i != e; ++i) {
      SmallVector<EVT, 4> ValueVTs;
      ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
      for (unsigned Value = 0, NumValues = ValueVTs.size();
           Value != NumValues; ++Value) {
        EVT VT = ValueVTs[Value];
-      Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+      Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
        SDValue Op = SDValue(Args[i].Node.getNode(),
                             Args[i].Node.getResNo() + Value);
        ISD::ArgFlagsTy Flags;
        unsigned OriginalAlignment =
-        getTargetData()->getABITypeAlignment(ArgTy);
+        getDataLayout()->getABITypeAlignment(ArgTy);
  
        if (Args[i].isZExt)
          Flags.setZExt();
@@ -6394,7 +6488,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
          Flags.setByVal();
          PointerType *Ty = cast<PointerType>(Args[i].Ty);
          Type *ElementTy = Ty->getElementType();
-        Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
+        Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy));
          // For ByVal, alignment should come from FE.  BE will guess if this
          // info is not there but there are cases it cannot get right.
          unsigned FrameAlign;
@@ -6408,8 +6502,8 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
          Flags.setNest();
        Flags.setOrigAlign(OriginalAlignment);
  
-      EVT PartVT = getRegisterType(RetTy->getContext(), VT);
-      unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
+      EVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
+      unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
        SmallVector<SDValue, 4> Parts(NumParts);
        ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
  
@@ -6418,90 +6512,89 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
        else if (Args[i].isZExt)
          ExtendKind = ISD::ZERO_EXTEND;
  
-      getCopyToParts(DAG, dl, Op, &Parts[0], NumParts,
-                     PartVT, ExtendKind);
+      getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
+                     PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind);
  
        for (unsigned j = 0; j != NumParts; ++j) {
          // if it isn't first piece, alignment must be 1
          ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
-                               i < NumFixedArgs);
+                               i < CLI.NumFixedArgs);
          if (NumParts > 1 && j == 0)
            MyFlags.Flags.setSplit();
          else if (j != 0)
            MyFlags.Flags.setOrigAlign(1);
  
-        Outs.push_back(MyFlags);
-        OutVals.push_back(Parts[j]);
+        CLI.Outs.push_back(MyFlags);
+        CLI.OutVals.push_back(Parts[j]);
        }
      }
    }
  
    // Handle the incoming return values from the call.
-  SmallVector<ISD::InputArg, 32> Ins;
+  CLI.Ins.clear();
    SmallVector<EVT, 4> RetTys;
-  ComputeValueVTs(*this, RetTy, RetTys);
+  ComputeValueVTs(*this, CLI.RetTy, RetTys);
    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
      EVT VT = RetTys[I];
-    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
-    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+    EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
      for (unsigned i = 0; i != NumRegs; ++i) {
        ISD::InputArg MyFlags;
        MyFlags.VT = RegisterVT.getSimpleVT();
-      MyFlags.Used = isReturnValueUsed;
-      if (RetSExt)
+      MyFlags.Used = CLI.IsReturnValueUsed;
+      if (CLI.RetSExt)
          MyFlags.Flags.setSExt();
-      if (RetZExt)
+      if (CLI.RetZExt)
          MyFlags.Flags.setZExt();
-      if (isInreg)
+      if (CLI.IsInReg)
          MyFlags.Flags.setInReg();
-      Ins.push_back(MyFlags);
+      CLI.Ins.push_back(MyFlags);
      }
    }
  
    SmallVector<SDValue, 4> InVals;
-  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
-                    Outs, OutVals, Ins, dl, DAG, InVals);
+  CLI.Chain = LowerCall(CLI, InVals);
  
    // Verify that the target's LowerCall behaved as expected.
-  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+  assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
           "LowerCall didn't return a valid chain!");
-  assert((!isTailCall || InVals.empty()) &&
+  assert((!CLI.IsTailCall || InVals.empty()) &&
           "LowerCall emitted a return value for a tail call!");
-  assert((isTailCall || InVals.size() == Ins.size()) &&
+  assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
           "LowerCall didn't emit the correct number of values!");
  
    // For a tail call, the return value is merely live-out and there aren't
    // any nodes in the DAG representing it. Return a special value to
    // indicate that a tail call has been emitted and no more Instructions
    // should be processed in the current block.
-  if (isTailCall) {
-    DAG.setRoot(Chain);
+  if (CLI.IsTailCall) {
+    CLI.DAG.setRoot(CLI.Chain);
      return std::make_pair(SDValue(), SDValue());
    }
  
-  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+  DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
            assert(InVals[i].getNode() &&
                   "LowerCall emitted a null value!");
-          assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+          assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
                   "LowerCall emitted a value with the wrong type!");
          });
  
    // Collect the legal value parts into potentially illegal values
    // that correspond to the original function's return values.
    ISD::NodeType AssertOp = ISD::DELETED_NODE;
-  if (RetSExt)
+  if (CLI.RetSExt)
      AssertOp = ISD::AssertSext;
-  else if (RetZExt)
+  else if (CLI.RetZExt)
      AssertOp = ISD::AssertZext;
    SmallVector<SDValue, 4> ReturnValues;
    unsigned CurReg = 0;
    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
      EVT VT = RetTys[I];
-    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
-    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+    EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
  
-    ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg],
-                                            NumRegs, RegisterVT, VT,
+    ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
+                                            NumRegs, RegisterVT, VT, NULL,
                                              AssertOp));
      CurReg += NumRegs;
    }
@@ -6510,12 +6603,12 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
    // such a node, so we just return a null return value in that case. In
    // that case, nothing will actually look at the value.
    if (ReturnValues.empty())
-    return std::make_pair(SDValue(), Chain);
+    return std::make_pair(SDValue(), CLI.Chain);
  
-  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
-                            DAG.getVTList(&RetTys[0], RetTys.size()),
+  SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
+                                CLI.DAG.getVTList(&RetTys[0], RetTys.size()),
                              &ReturnValues[0], ReturnValues.size());
-  return std::make_pair(Res, Chain);
+  return std::make_pair(Res, CLI.Chain);
  }
  
  void TargetLowering::LowerOperationWrapper(SDNode *N,
@@ -6540,7 +6633,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
  
    RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
    SDValue Chain = DAG.getEntryNode();
-  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
+  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V);
    PendingExports.push_back(Chain);
  }
  
@@ -6570,7 +6663,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
    const Function &F = *LLVMBB->getParent();
    SelectionDAG &DAG = SDB->DAG;
    DebugLoc dl = SDB->getCurDebugLoc();
-  const TargetData *TD = TLI.getTargetData();
+  const DataLayout *TD = TLI.getDataLayout();
    SmallVector<ISD::InputArg, 16> Ins;
  
    // Check whether the function can return without sret-demotion.
@@ -6588,7 +6681,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
      ISD::ArgFlagsTy Flags;
      Flags.setSRet();
      EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
-    ISD::InputArg RetArg(Flags, RegisterVT, true);
+    ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0);
      Ins.push_back(RetArg);
    }
  
@@ -6607,15 +6700,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
        unsigned OriginalAlignment =
          TD->getABITypeAlignment(ArgTy);
  
-      if (F.paramHasAttr(Idx, Attribute::ZExt))
+      if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt))
          Flags.setZExt();
-      if (F.paramHasAttr(Idx, Attribute::SExt))
+      if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt))
          Flags.setSExt();
-      if (F.paramHasAttr(Idx, Attribute::InReg))
+      if (F.getParamAttributes(Idx).hasAttribute(Attributes::InReg))
          Flags.setInReg();
-      if (F.paramHasAttr(Idx, Attribute::StructRet))
+      if (F.getParamAttributes(Idx).hasAttribute(Attributes::StructRet))
          Flags.setSRet();
-      if (F.paramHasAttr(Idx, Attribute::ByVal)) {
+      if (F.getParamAttributes(Idx).hasAttribute(Attributes::ByVal)) {
          Flags.setByVal();
          PointerType *Ty = cast<PointerType>(I->getType());
          Type *ElementTy = Ty->getElementType();
@@ -6629,14 +6722,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
            FrameAlign = TLI.getByValTypeAlignment(ElementTy);
          Flags.setByValAlign(FrameAlign);
        }
-      if (F.paramHasAttr(Idx, Attribute::Nest))
+      if (F.getParamAttributes(Idx).hasAttribute(Attributes::Nest))
          Flags.setNest();
        Flags.setOrigAlign(OriginalAlignment);
  
        EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
        unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
        for (unsigned i = 0; i != NumRegs; ++i) {
-        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
+        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed,
+                              Idx-1, i*RegisterVT.getStoreSize());
          if (NumRegs > 1 && i == 0)
            MyFlags.Flags.setSplit();
          // if it isn't first piece, alignment must be 1
@@ -6682,7 +6776,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
      EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
      ISD::NodeType AssertOp = ISD::DELETED_NODE;
      SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
-                                        RegVT, VT, AssertOp);
+                                        RegVT, VT, NULL, AssertOp);
  
      MachineFunction& MF = SDB->DAG.getMachineFunction();
      MachineRegisterInfo& RegInfo = MF.getRegInfo();
@@ -6716,14 +6810,14 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
  
        if (!I->use_empty()) {
          ISD::NodeType AssertOp = ISD::DELETED_NODE;
-        if (F.paramHasAttr(Idx, Attribute::SExt))
+        if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt))
            AssertOp = ISD::AssertSext;
-        else if (F.paramHasAttr(Idx, Attribute::ZExt))
+        else if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt))
            AssertOp = ISD::AssertZext;
  
          ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
                                               NumParts, PartVT, VT,
-                                             AssertOp));
+                                             NULL, AssertOp));
        }
  
        i += NumParts;
@@ -6735,7 +6829,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
  
      // Note down frame index.
      if (FrameIndexSDNode *FI =
-       dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+        dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
        FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
  
      SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,