Create and use an llvm.eh.sjlj.functioncontext intrinsic.

[oota-llvm.git] / lib / CodeGen / SelectionDAG / SelectionDAGBuilder.cpp
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index 48a2dc58da28384fa4a2ff6a9ba644389b56f172..b2b93c9ff57a1dba845cc85a216e0bc9a0e6cee7 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -50,7 +50,6 @@
  #include "llvm/Target/TargetIntrinsicInfo.h"
  #include "llvm/Target/TargetLowering.h"
  #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Compiler.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
  #include "llvm/Support/ErrorHandling.h"
@@ -84,9 +83,7 @@ LimitFPPrecision("limit-float-precision",
  // %buffer = alloca [4096 x i8]
  // %data = load [4096 x i8]* %argPtr
  // store [4096 x i8] %data, [4096 x i8]* %buffer
-static cl::opt<unsigned>
-MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"),
-                  cl::init(64), cl::Hidden);
+static const unsigned MaxParallelChains = 64;
  
  static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
                                        const SDValue *Parts, unsigned NumParts,
@@ -283,12 +280,35 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
      }
  
      // Vector/Vector bitcast.
-    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+    if (ValueVT.getSizeInBits() == PartVT.getSizeInBits())
+      return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+    assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+      "Cannot handle this kind of promotion");
+    // Promoted vector extract
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT, Val);
+
    }
  
-  assert(ValueVT.getVectorElementType() == PartVT &&
-         ValueVT.getVectorNumElements() == 1 &&
+  // Trivial bitcast if the types are the same size and the destination
+  // vector type is legal.
+  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+      TLI.isTypeLegal(ValueVT))
+    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+  // Handle cases such as i8 -> <1 x i1>
+  assert(ValueVT.getVectorNumElements() == 1 &&
           "Only trivial scalar-to-vector conversions should get here!");
+
+  if (ValueVT.getVectorNumElements() == 1 &&
+      ValueVT.getVectorElementType() != PartVT) {
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT.getScalarType(), Val);
+  }
+
    return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
  }
  
@@ -429,7 +449,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
        // Bitconvert vector->vector case.
        Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
      } else if (PartVT.isVector() &&
-               PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
+               PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
                 PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
        EVT ElementVT = PartVT.getVectorElementType();
        // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
@@ -449,13 +469,25 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
  
        //SDValue UndefElts = DAG.getUNDEF(VectorTy);
        //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
-    } else {
+    } else if (PartVT.isVector() &&
+               PartVT.getVectorElementType().bitsGE(
+                 ValueVT.getVectorElementType()) &&
+               PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+
+      // Promoted vector extract
+      bool Smaller = PartVT.bitsLE(ValueVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                        DL, PartVT, Val);
+    } else{
        // Vector -> scalar conversion.
-      assert(ValueVT.getVectorElementType() == PartVT &&
-             ValueVT.getVectorNumElements() == 1 &&
+      assert(ValueVT.getVectorNumElements() == 1 &&
               "Only trivial vector-to-scalar conversions should get here!");
        Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                          PartVT, Val, DAG.getIntPtrConstant(0));
+
+      bool Smaller = ValueVT.bitsLE(PartVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                         DL, PartVT, Val);
      }
  
      Parts[0] = Val;
@@ -546,7 +578,7 @@ namespace {
        : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
  
      RegsForValue(LLVMContext &Context, const TargetLowering &tli,
-                 unsigned Reg, const Type *Ty) {
+                 unsigned Reg, Type *Ty) {
        ComputeValueVTs(tli, Ty, ValueVTs);
  
        for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
@@ -641,16 +673,17 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
        // If the source register was virtual and if we know something about it,
        // add an assert node.
        if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
-          !RegisterVT.isInteger() || RegisterVT.isVector() ||
-          !FuncInfo.LiveOutRegInfo.inBounds(Regs[Part+i]))
+          !RegisterVT.isInteger() || RegisterVT.isVector())
+        continue;
+
+      const FunctionLoweringInfo::LiveOutInfo *LOI =
+        FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
+      if (!LOI)
          continue;
-      
-      const FunctionLoweringInfo::LiveOutInfo &LOI =
-        FuncInfo.LiveOutRegInfo[Regs[Part+i]];
  
        unsigned RegSize = RegisterVT.getSizeInBits();
-      unsigned NumSignBits = LOI.NumSignBits;
-      unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+      unsigned NumSignBits = LOI->NumSignBits;
+      unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
  
        // FIXME: We capture more information than the dag can represent.  For
        // now, just use the tightest assertzext/assertsext possible.
@@ -785,11 +818,20 @@ void SelectionDAGBuilder::clear() {
    UnusedArgNodeMap.clear();
    PendingLoads.clear();
    PendingExports.clear();
-  DanglingDebugInfoMap.clear();
    CurDebugLoc = DebugLoc();
    HasTailCall = false;
  }
  
+/// clearDanglingDebugInfo - Clear the dangling debug information
+/// map. This function is seperated from the clear so that debug
+/// information that is dangling in a basic block can be properly
+/// resolved in a different basic block. This allows the
+/// SelectionDAG to resolve dangling debug information attached
+/// to PHI nodes.
+void SelectionDAGBuilder::clearDanglingDebugInfo() {
+  DanglingDebugInfoMap.clear();
+}
+
  /// getRoot - Return the current virtual root of the Selection DAG,
  /// flushing any PendingLoad items. This must be done before emitting
  /// a store or any other node that may need to be ordered after any
@@ -908,7 +950,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
                                Val.getResNo(), Offset, dl, DbgSDNodeOrder);
          DAG.AddDbgValue(SDV, Val.getNode(), false);
        }
-    } else 
+    } else
        DEBUG(dbgs() << "Dropping debug info for " << DI);
      DanglingDebugInfoMap[V] = DanglingDebugInfo();
    }
@@ -1027,7 +1069,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
      if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
        return DAG.getBlockAddress(BA, VT);
  
-    const VectorType *VecTy = cast<VectorType>(V->getType());
+    VectorType *VecTy = cast<VectorType>(V->getType());
      unsigned NumElements = VecTy->getNumElements();
  
      // Now that we know the number and type of the elements, get that number of
@@ -1129,15 +1171,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
          else if (F->paramHasAttr(0, Attribute::ZExt))
            ExtendKind = ISD::ZERO_EXTEND;
  
-        // FIXME: C calling convention requires the return type to be promoted
-        // to at least 32-bit. But this is not necessary for non-C calling
-        // conventions. The frontend should mark functions whose return values
-        // require promoting with signext or zeroext attributes.
-        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
-          EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
-          if (VT.bitsLT(MinVT))
-            VT = MinVT;
-        }
+        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
+          VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
  
          unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
          EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
@@ -1152,9 +1187,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
            Flags.setInReg();
  
          // Propagate extension type if any
-        if (F->paramHasAttr(0, Attribute::SExt))
+        if (ExtendKind == ISD::SIGN_EXTEND)
            Flags.setSExt();
-        else if (F->paramHasAttr(0, Attribute::ZExt))
+        else if (ExtendKind == ISD::ZERO_EXTEND)
            Flags.setZExt();
  
          for (unsigned i = 0; i < NumParts; ++i) {
@@ -1184,6 +1219,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
  /// created for it, emit nodes to copy the value into the virtual
  /// registers.
  void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+  // Skip empty types
+  if (V->getType()->isEmptyTy())
+    return;
+
    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
    if (VMI != FuncInfo.ValueMap.end()) {
      assert(!V->use_empty() && "Unused value assigned virtual registers!");
@@ -1232,6 +1271,26 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
    return true;
  }
  
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
+                                            MachineBasicBlock *Dst) {
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  if (!BPI)
+    return 0;
+  const BasicBlock *SrcBB = Src->getBasicBlock();
+  const BasicBlock *DstBB = Dst->getBasicBlock();
+  return BPI->getEdgeWeight(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::
+addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
+                       uint32_t Weight /* = 0 */) {
+  if (!Weight)
+    Weight = getEdgeWeight(Src, Dst);
+  Src->addSuccessor(Dst, Weight);
+}
+
+
  static bool InBlock(const Value *V, const BasicBlock *BB) {
    if (const Instruction *I = dyn_cast<Instruction>(V))
      return I->getParent() == BB;
@@ -1417,7 +1476,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
    //     jle foo
    //
    if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
-    if (!TLI.isJumpExpensive() && 
+    if (!TLI.isJumpExpensive() &&
          BOp->hasOneUse() &&
          (BOp->getOpcode() == Instruction::And ||
           BOp->getOpcode() == Instruction::Or)) {
@@ -1501,8 +1560,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
    }
  
    // Update successor info
-  SwitchBB->addSuccessor(CB.TrueBB);
-  SwitchBB->addSuccessor(CB.FalseBB);
+  addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
+  addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
  
    // Set NextBlock to be the MBB immediately after the current one, if any.
    // This is used to avoid emitting unnecessary branches to the next block.
@@ -1646,8 +1705,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
  
    MachineBasicBlock* MBB = B.Cases[0].ThisBB;
  
-  SwitchBB->addSuccessor(B.Default);
-  SwitchBB->addSuccessor(MBB);
+  addSuccessorWithWeight(SwitchBB, B.Default);
+  addSuccessorWithWeight(SwitchBB, MBB);
  
    SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                                  MVT::Other, CopyTo, RangeCmp,
@@ -1670,7 +1729,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
    SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
                                         Reg, VT);
    SDValue Cmp;
-  if (CountPopulation_64(B.Mask) == 1) {
+  unsigned PopCount = CountPopulation_64(B.Mask);
+  if (PopCount == 1) {
      // Testing for a single bit; just compare the shift count with what it
      // would need to be to shift a 1 bit in that position.
      Cmp = DAG.getSetCC(getCurDebugLoc(),
@@ -1678,6 +1738,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                         ShiftOp,
                         DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
                         ISD::SETEQ);
+  } else if (PopCount == BB.Range) {
+    // There is only one zero bit in the range, test for it directly.
+    Cmp = DAG.getSetCC(getCurDebugLoc(),
+                       TLI.getSetCCResultType(VT),
+                       ShiftOp,
+                       DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
+                       ISD::SETNE);
    } else {
      // Make desired shift
      SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
@@ -1692,8 +1759,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                         ISD::SETNE);
    }
  
-  SwitchBB->addSuccessor(B.TargetBB);
-  SwitchBB->addSuccessor(NextMBB);
+  addSuccessorWithWeight(SwitchBB, B.TargetBB);
+  addSuccessorWithWeight(SwitchBB, NextMBB);
  
    SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                                MVT::Other, getControlRoot(),
@@ -1743,6 +1810,49 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
  void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
  }
  
+void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
+  llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
+}
+
+void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
+  assert(FuncInfo.MBB->isLandingPad() &&
+         "Call to landingpad not in landing pad!");
+
+  MachineBasicBlock *MBB = FuncInfo.MBB;
+  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  AddLandingPadInfo(LP, MMI, MBB);
+
+  SmallVector<EVT, 2> ValueVTs;
+  ComputeValueVTs(TLI, LP.getType(), ValueVTs);
+
+  // Insert the EXCEPTIONADDR instruction.
+  assert(FuncInfo.MBB->isLandingPad() &&
+         "Call to eh.exception not in landing pad!");
+  SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+  SDValue Ops[2];
+  Ops[0] = DAG.getRoot();
+  SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurDebugLoc(), VTs, Ops, 1);
+  SDValue Chain = Op1.getValue(1);
+
+  // Insert the EHSELECTION instruction.
+  VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+  Ops[0] = Op1;
+  Ops[1] = Chain;
+  SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurDebugLoc(), VTs, Ops, 2);
+  Chain = Op2.getValue(1);
+  Op2 = DAG.getSExtOrTrunc(Op2, getCurDebugLoc(), MVT::i32);
+
+  Ops[0] = Op1;
+  Ops[1] = Op2;
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                            DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+                            &Ops[0], 2);
+
+  std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain);
+  setValue(&LP, RetPair.first);
+  DAG.setRoot(RetPair.second);
+}
+
  /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
  /// small case ranges).
  bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
@@ -1801,8 +1911,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
                                      ISD::SETEQ);
  
          // Update successor info.
-        SwitchBB->addSuccessor(Small.BB);
-        SwitchBB->addSuccessor(Default);
+        addSuccessorWithWeight(SwitchBB, Small.BB);
+        addSuccessorWithWeight(SwitchBB, Default);
  
          // Insert the true branch.
          SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
@@ -1858,7 +1968,11 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
        CC = ISD::SETLE;
        LHS = I->Low; MHS = SV; RHS = I->High;
      }
-    CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
+
+    uint32_t ExtraWeight = I->ExtraWeight;
+    CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
+                 /* me */ CurBlock,
+                 /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2);
  
      // If emitting the first comparison, just call visitSwitchCase to emit the
      // code into the current block.  Otherwise, push the CaseBlock onto the
@@ -1888,10 +2002,10 @@ static APInt ComputeRange(const APInt &First, const APInt &Last) {
  }
  
  /// handleJTSwitchCase - Emit jumptable for current switch case range
-bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
-                                             CaseRecVector& WorkList,
-                                             const Value* SV,
-                                             MachineBasicBlock* Default,
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
+                                             CaseRecVector &WorkList,
+                                             const Value *SV,
+                                             MachineBasicBlock *Default,
                                               MachineBasicBlock *SwitchBB) {
    Case& FrontCase = *CR.Range.first;
    Case& BackCase  = *(CR.Range.second-1);
@@ -1900,8 +2014,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
    const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
  
    APInt TSize(First.getBitWidth(), 0);
-  for (CaseItr I = CR.Range.first, E = CR.Range.second;
-       I!=E; ++I)
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
      TSize += I->size();
  
    if (!areJTsAllowed(TLI) || TSize.ult(4))
@@ -1915,7 +2028,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
    DEBUG(dbgs() << "Lowering jump table\n"
                 << "First entry: " << First << ". Last entry: " << Last << '\n'
                 << "Range: " << Range
-               << "Size: " << TSize << ". Density: " << Density << "\n\n");
+               << ". Size: " << TSize << ". Density: " << Density << "\n\n");
  
    // Get the MachineFunction which holds the current MBB.  This is used when
    // inserting any additional MBBs necessary to represent the switch.
@@ -1933,8 +2046,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
    // table.
    MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
    CurMF->insert(BBI, JumpTableBB);
-  CR.CaseBB->addSuccessor(Default);
-  CR.CaseBB->addSuccessor(JumpTableBB);
+
+  addSuccessorWithWeight(CR.CaseBB, Default);
+  addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
  
    // Build a vector of destination BBs, corresponding to each target
    // of the jump table. If the value of the jump table slot corresponds to
@@ -1961,7 +2075,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
           E = DestBBs.end(); I != E; ++I) {
      if (!SuccsHandled[(*I)->getNumber()]) {
        SuccsHandled[(*I)->getNumber()] = true;
-      JumpTableBB->addSuccessor(*I);
+      addSuccessorWithWeight(JumpTableBB, *I);
      }
    }
  
@@ -1978,7 +2092,6 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
      visitJumpTableHeader(JT, JTH, SwitchBB);
  
    JTCases.push_back(JumpTableBlock(JTH, JT));
-
    return true;
  }
  
@@ -2028,9 +2141,13 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
      APInt Range = ComputeRange(LEnd, RBegin);
      assert((Range - 2ULL).isNonNegative() &&
             "Invalid case distance");
-    double LDensity = (double)LSize.roundToDouble() /
+    // Use volatile double here to avoid excess precision issues on some hosts,
+    // e.g. that use 80-bit X87 registers.
+    volatile double LDensity =
+       (double)LSize.roundToDouble() /
                             (LEnd - First + 1ULL).roundToDouble();
-    double RDensity = (double)RSize.roundToDouble() /
+    volatile double RDensity =
+      (double)RSize.roundToDouble() /
                             (Last - RBegin + 1ULL).roundToDouble();
      double Metric = Range.logBase2()*(LDensity+RDensity);
      // Should always split in some non-trivial place
@@ -2248,12 +2365,17 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
                                         const SwitchInst& SI) {
    size_t numCmps = 0;
  
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
    // Start with "simple" cases
    for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
-    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+    BasicBlock *SuccBB = SI.getSuccessor(i);
+    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
+
+    uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
+
      Cases.push_back(Case(SI.getSuccessorValue(i),
                           SI.getSuccessorValue(i),
-                         SMBB));
+                         SMBB, ExtraWeight));
    }
    std::sort(Cases.begin(), Cases.end(), CaseCmp());
  
@@ -2261,7 +2383,8 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
    if (Cases.size() >= 2)
      // Must recompute end() each iteration because it may be
      // invalidated by erase if we hold on to it
-    for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+    for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+         J != Cases.end(); ) {
        const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
        const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
        MachineBasicBlock* nextBB = J->BB;
@@ -2272,6 +2395,16 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
        if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
          I->High = J->High;
          J = Cases.erase(J);
+
+        if (BranchProbabilityInfo *BPI = FuncInfo.BPI) {
+          uint32_t CurWeight = currentBB->getBasicBlock() ?
+            BPI->getEdgeWeight(SI.getParent(), currentBB->getBasicBlock()) : 16;
+          uint32_t NextWeight = nextBB->getBasicBlock() ?
+            BPI->getEdgeWeight(SI.getParent(), nextBB->getBasicBlock()) : 16;
+
+          BPI->setEdgeWeight(SI.getParent(), currentBB->getBasicBlock(),
+                             CurWeight + NextWeight);
+        }
        } else {
          I = J++;
        }
@@ -2375,8 +2508,10 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
      succs.push_back(I.getSuccessor(i));
    array_pod_sort(succs.begin(), succs.end());
    succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
-  for (unsigned i = 0, e = succs.size(); i != e; ++i)
-    IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
+  for (unsigned i = 0, e = succs.size(); i != e; ++i) {
+    MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+    addSuccessorWithWeight(IndirectBrMBB, Succ);
+  }
  
    DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
                            MVT::Other, getControlRoot(),
@@ -2385,31 +2520,15 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
  
  void SelectionDAGBuilder::visitFSub(const User &I) {
    // -0.0 - X --> fneg
-  const Type *Ty = I.getType();
-  if (Ty->isVectorTy()) {
-    if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
-      const VectorType *DestTy = cast<VectorType>(I.getType());
-      const Type *ElTy = DestTy->getElementType();
-      unsigned VL = DestTy->getNumElements();
-      std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
-      Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
-      if (CV == CNZ) {
-        SDValue Op2 = getValue(I.getOperand(1));
-        setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                                 Op2.getValueType(), Op2));
-        return;
-      }
-    }
+  Type *Ty = I.getType();
+  if (isa<Constant>(I.getOperand(0)) &&
+      I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+    SDValue Op2 = getValue(I.getOperand(1));
+    setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                             Op2.getValueType(), Op2));
+    return;
    }
  
-  if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
-    if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
-      SDValue Op2 = getValue(I.getOperand(1));
-      setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
-                               Op2.getValueType(), Op2));
-      return;
-    }
-
    visitBinary(I, ISD::FSUB);
  }
  
@@ -2423,37 +2542,51 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
  void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
    SDValue Op1 = getValue(I.getOperand(0));
    SDValue Op2 = getValue(I.getOperand(1));
-  if (!I.getType()->isVectorTy() &&
-      Op2.getValueType() != TLI.getShiftAmountTy()) {
+
+  MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
+
+  // Coerce the shift amount to the right type if we can.
+  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+    unsigned ShiftSize = ShiftTy.getSizeInBits();
+    unsigned Op2Size = Op2.getValueType().getSizeInBits();
+    DebugLoc DL = getCurDebugLoc();
+
      // If the operand is smaller than the shift count type, promote it.
-    EVT PTy = TLI.getPointerTy();
-    EVT STy = TLI.getShiftAmountTy();
-    if (STy.bitsGT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
-                        TLI.getShiftAmountTy(), Op2);
+    if (ShiftSize > Op2Size)
+      Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+
      // If the operand is larger than the shift count type but the shift
      // count type has enough bits to represent any shift value, truncate
      // it now. This is a common case and it exposes the truncate to
      // optimization early.
-    else if (STy.getSizeInBits() >=
-             Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
-      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                        TLI.getShiftAmountTy(), Op2);
-    // Otherwise we'll need to temporarily settle for some other
-    // convenient type; type legalization will make adjustments as
-    // needed.
-    else if (PTy.bitsLT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
-                        TLI.getPointerTy(), Op2);
-    else if (PTy.bitsGT(Op2.getValueType()))
-      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
-                        TLI.getPointerTy(), Op2);
+    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+      Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+    // Otherwise we'll need to temporarily settle for some other convenient
+    // type.  Type legalization will make adjustments once the shiftee is split.
+    else
+      Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
    }
  
    setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
                             Op1.getValueType(), Op1, Op2));
  }
  
+void SelectionDAGBuilder::visitSDiv(const User &I) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+
+  // Turn exact SDivs into multiplications.
+  // FIXME: This should be in DAGCombiner, but it doesn't have access to the
+  // exact bit.
+  if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
+      !isa<ConstantSDNode>(Op1) &&
+      isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
+    setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG));
+  else
+    setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(),
+                             Op1, Op2));
+}
+
  void SelectionDAGBuilder::visitICmp(const User &I) {
    ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
    if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
@@ -2491,10 +2624,12 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
    SDValue Cond     = getValue(I.getOperand(0));
    SDValue TrueVal  = getValue(I.getOperand(1));
    SDValue FalseVal = getValue(I.getOperand(2));
+  ISD::NodeType OpCode = Cond.getValueType().isVector() ?
+    ISD::VSELECT : ISD::SELECT;
  
    for (unsigned i = 0; i != NumValues; ++i)
-    Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
-                          TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
+    Values[i] = DAG.getNode(OpCode, getCurDebugLoc(),
+                            TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
                              Cond,
                              SDValue(TrueVal.getNode(),
                                      TrueVal.getResNo() + i),
@@ -2707,7 +2842,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
      // Analyze the access pattern of the vector to see if we can extract
      // two subvectors and do the shuffle. The analysis is done by calculating
      // the range of elements the mask access on both vectors.
-    int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+    int MinRange[2] = { static_cast<int>(SrcNumElts+1),
+                        static_cast<int>(SrcNumElts+1)};
      int MaxRange[2] = {-1, -1};
  
      for (unsigned i = 0; i != MaskNumElts; ++i) {
@@ -2815,12 +2951,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
  void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
    const Value *Op0 = I.getOperand(0);
    const Value *Op1 = I.getOperand(1);
-  const Type *AggTy = I.getType();
-  const Type *ValTy = Op1->getType();
+  Type *AggTy = I.getType();
+  Type *ValTy = Op1->getType();
    bool IntoUndef = isa<UndefValue>(Op0);
    bool FromUndef = isa<UndefValue>(Op1);
  
-  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
  
    SmallVector<EVT, 4> AggValueVTs;
    ComputeValueVTs(TLI, AggTy, AggValueVTs);
@@ -2832,16 +2968,18 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
    SmallVector<SDValue, 4> Values(NumAggValues);
  
    SDValue Agg = getValue(Op0);
-  SDValue Val = getValue(Op1);
    unsigned i = 0;
    // Copy the beginning value(s) from the original aggregate.
    for (; i != LinearIndex; ++i)
      Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
                  SDValue(Agg.getNode(), Agg.getResNo() + i);
    // Copy values from the inserted value(s).
-  for (; i != LinearIndex + NumValValues; ++i)
-    Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
-                SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+  if (NumValValues) {
+    SDValue Val = getValue(Op1);
+    for (; i != LinearIndex + NumValValues; ++i)
+      Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+  }
    // Copy remaining value(s) from the original aggregate.
    for (; i != NumAggValues; ++i)
      Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
@@ -2854,16 +2992,23 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
  
  void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
    const Value *Op0 = I.getOperand(0);
-  const Type *AggTy = Op0->getType();
-  const Type *ValTy = I.getType();
+  Type *AggTy = Op0->getType();
+  Type *ValTy = I.getType();
    bool OutOfUndef = isa<UndefValue>(Op0);
  
-  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
  
    SmallVector<EVT, 4> ValValueVTs;
    ComputeValueVTs(TLI, ValTy, ValValueVTs);
  
    unsigned NumValValues = ValValueVTs.size();
+
+  // Ignore a extractvalue that produces an empty object
+  if (!NumValValues) {
+    setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+    return;
+  }
+
    SmallVector<SDValue, 4> Values(NumValValues);
  
    SDValue Agg = getValue(Op0);
@@ -2881,12 +3026,12 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
  
  void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
    SDValue N = getValue(I.getOperand(0));
-  const Type *Ty = I.getOperand(0)->getType();
+  Type *Ty = I.getOperand(0)->getType();
  
    for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
         OI != E; ++OI) {
      const Value *Idx = *OI;
-    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+    if (StructType *StTy = dyn_cast<StructType>(Ty)) {
        unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
        if (Field) {
          // N = N + Offset
@@ -2957,7 +3102,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
    if (FuncInfo.StaticAllocaMap.count(&I))
      return;   // getValue will auto-populate this.
  
-  const Type *Ty = I.getAllocatedType();
+  Type *Ty = I.getAllocatedType();
    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
    unsigned Align =
      std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
@@ -3004,10 +3149,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
  }
  
  void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
+  if (I.isAtomic())
+    return visitAtomicLoad(I);
+
    const Value *SV = I.getOperand(0);
    SDValue Ptr = getValue(SV);
  
-  const Type *Ty = I.getType();
+  Type *Ty = I.getType();
  
    bool isVolatile = I.isVolatile();
    bool isNonTemporal = I.getMetadata("nontemporal") != 0;
@@ -3081,6 +3229,9 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
  }
  
  void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+  if (I.isAtomic())
+    return visitAtomicStore(I);
+
    const Value *SrcV = I.getOperand(0);
    const Value *PtrV = I.getOperand(1);
  
@@ -3131,6 +3282,179 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
    DAG.setRoot(StoreNode);
  }
  
+static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
+                                    SynchronizationScope Scope,
+                                    bool Before, DebugLoc dl,
+                                    SelectionDAG &DAG,
+                                    const TargetLowering &TLI) {
+  // Fence, if necessary
+  if (Before) {
+    if (Order == AcquireRelease || Order == SequentiallyConsistent)
+      Order = Release;
+    else if (Order == Acquire || Order == Monotonic)
+      return Chain;
+  } else {
+    if (Order == AcquireRelease)
+      Order = Acquire;
+    else if (Order == Release || Order == Monotonic)
+      return Chain;
+  }
+  SDValue Ops[3];
+  Ops[0] = Chain;
+  Ops[1] = DAG.getConstant(Order, TLI.getPointerTy());
+  Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy());
+  return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3);
+}
+
+void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+  AtomicOrdering Order = I.getOrdering();
+  SynchronizationScope Scope = I.getSynchScope();
+
+  SDValue InChain = getRoot();
+
+  if (TLI.getInsertFencesForAtomic())
+    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+                                   DAG, TLI);
+
+  SDValue L =
+    DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
+                  getValue(I.getCompareOperand()).getValueType().getSimpleVT(),
+                  InChain,
+                  getValue(I.getPointerOperand()),
+                  getValue(I.getCompareOperand()),
+                  getValue(I.getNewValOperand()),
+                  MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  Scope);
+
+  SDValue OutChain = L.getValue(1);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+                                    DAG, TLI);
+
+  setValue(&I, L);
+  DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+  ISD::NodeType NT;
+  switch (I.getOperation()) {
+  default: llvm_unreachable("Unknown atomicrmw operation"); return;
+  case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
+  case AtomicRMWInst::Add:  NT = ISD::ATOMIC_LOAD_ADD; break;
+  case AtomicRMWInst::Sub:  NT = ISD::ATOMIC_LOAD_SUB; break;
+  case AtomicRMWInst::And:  NT = ISD::ATOMIC_LOAD_AND; break;
+  case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
+  case AtomicRMWInst::Or:   NT = ISD::ATOMIC_LOAD_OR; break;
+  case AtomicRMWInst::Xor:  NT = ISD::ATOMIC_LOAD_XOR; break;
+  case AtomicRMWInst::Max:  NT = ISD::ATOMIC_LOAD_MAX; break;
+  case AtomicRMWInst::Min:  NT = ISD::ATOMIC_LOAD_MIN; break;
+  case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
+  case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
+  }
+  AtomicOrdering Order = I.getOrdering();
+  SynchronizationScope Scope = I.getSynchScope();
+
+  SDValue InChain = getRoot();
+
+  if (TLI.getInsertFencesForAtomic())
+    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+                                   DAG, TLI);
+
+  SDValue L =
+    DAG.getAtomic(NT, dl,
+                  getValue(I.getValOperand()).getValueType().getSimpleVT(),
+                  InChain,
+                  getValue(I.getPointerOperand()),
+                  getValue(I.getValOperand()),
+                  I.getPointerOperand(), 0 /* Alignment */,
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  Scope);
+
+  SDValue OutChain = L.getValue(1);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+                                    DAG, TLI);
+
+  setValue(&I, L);
+  DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitFence(const FenceInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+  SDValue Ops[3];
+  Ops[0] = getRoot();
+  Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
+  Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
+  DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3));
+}
+
+void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+  AtomicOrdering Order = I.getOrdering();
+  SynchronizationScope Scope = I.getSynchScope();
+
+  SDValue InChain = getRoot();
+
+  EVT VT = EVT::getEVT(I.getType());
+
+  if (I.getAlignment() * 8 < VT.getSizeInBits())
+    report_fatal_error("Cannot generate unaligned atomic load");
+
+  SDValue L =
+    DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
+                  getValue(I.getPointerOperand()),
+                  I.getPointerOperand(), I.getAlignment(),
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  Scope);
+
+  SDValue OutChain = L.getValue(1);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+                                    DAG, TLI);
+
+  setValue(&I, L);
+  DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+
+  AtomicOrdering Order = I.getOrdering();
+  SynchronizationScope Scope = I.getSynchScope();
+
+  SDValue InChain = getRoot();
+
+  EVT VT = EVT::getEVT(I.getValueOperand()->getType());
+
+  if (I.getAlignment() * 8 < VT.getSizeInBits())
+    report_fatal_error("Cannot generate unaligned atomic store");
+
+  if (TLI.getInsertFencesForAtomic())
+    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+                                   DAG, TLI);
+
+  SDValue OutChain =
+    DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
+                  InChain,
+                  getValue(I.getPointerOperand()),
+                  getValue(I.getValueOperand()),
+                  I.getPointerOperand(), I.getAlignment(),
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  Scope);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+                                    DAG, TLI);
+
+  DAG.setRoot(OutChain);
+}
+
  /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
  /// node.
  void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
@@ -3210,7 +3534,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
    }
  
    if (!I.getType()->isVoidTy()) {
-    if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+    if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
        EVT VT = TLI.getValueType(PTy);
        Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
      }
@@ -3270,7 +3594,8 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
                    Root,
                    getValue(I.getArgOperand(0)),
                    getValue(I.getArgOperand(1)),
-                  I.getArgOperand(0));
+                  I.getArgOperand(0), 0 /* Alignment */,
+                  Monotonic, CrossThread);
    setValue(&I, L);
    DAG.setRoot(L.getValue(1));
    return 0;
@@ -4035,6 +4360,24 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
    return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
  }
  
+// getTruncatedArgReg - Find underlying register used for an truncated
+// argument.
+static unsigned getTruncatedArgReg(const SDValue &N) {
+  if (N.getOpcode() != ISD::TRUNCATE)
+    return 0;
+
+  const SDValue &Ext = N.getOperand(0);
+  if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){
+    const SDValue &CFR = Ext.getOperand(0);
+    if (CFR.getOpcode() == ISD::CopyFromReg)
+      return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
+    else
+      if (CFR.getOpcode() == ISD::TRUNCATE)
+        return getTruncatedArgReg(CFR);
+  }
+  return 0;
+}
+
  /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
  /// argument, create the corresponding DBG_VALUE machine instruction for it now.
  /// At the end of instruction selection, they will be inserted to the entry BB.
@@ -4055,24 +4398,18 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
    if (DV.isInlinedFnArgument(MF.getFunction()))
      return false;
  
-  MachineBasicBlock *MBB = FuncInfo.MBB;
-  if (MBB != &MF.front())
-    return false;
-
    unsigned Reg = 0;
-  if (Arg->hasByValAttr()) {
-    // Byval arguments' frame index is recorded during argument lowering.
-    // Use this info directly.
-    Reg = TRI->getFrameRegister(MF);
-    Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
-    // If byval argument ofset is not recorded then ignore this.
-    if (!Offset)
-      Reg = 0;
-  }
-
-  if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) {
-    Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
-    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+  // Some arguments' frame index is recorded during argument lowering.
+  Offset = FuncInfo.getArgumentFrameIndex(Arg);
+  if (Offset)
+      Reg = TRI->getFrameRegister(MF);
+
+  if (!Reg && N.getNode()) {
+    if (N.getOpcode() == ISD::CopyFromReg)
+      Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+    else
+      Reg = getTruncatedArgReg(N);
+    if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
        MachineRegisterInfo &RegInfo = MF.getRegInfo();
        unsigned PR = RegInfo.getLiveInPhysReg(Reg);
        if (PR)
@@ -4234,9 +4571,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
            SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
                                  0, dl, SDNodeOrder);
          else {
-          // Can't do anything with other non-AI cases yet.  This might be a
-          // parameter of a callee function that got inlined, for example.
-          DEBUG(dbgs() << "Dropping debug info for " << DI);
+          // Address is an argument, so try to emit its dbg value using
+          // virtual register info from the FuncInfo.ValueMap.
+          EmitFuncArgumentDbgValue(Address, Variable, 0, N);
            return 0;
          }
        } else if (AI)
@@ -4288,7 +4625,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      // debug info exists.
      ++SDNodeOrder;
      SDDbgValue *SDV;
-    if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
+    if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
        SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
        DAG.AddDbgValue(SDV, 0, false);
      } else {
@@ -4304,7 +4641,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                  N.getResNo(), Offset, dl, SDNodeOrder);
            DAG.AddDbgValue(SDV, N.getNode(), false);
          }
-      } else if (isa<PHINode>(V) && !V->use_empty() ) {
+      } else if (!V->use_empty() ) {
          // Do not call getValue(V) yet, as we don't want to generate code.
          // Remember it for later.
          DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
@@ -4417,6 +4754,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      MMI.setCurrentCallSite(CI->getZExtValue());
      return 0;
    }
+  case Intrinsic::eh_sjlj_functioncontext: {
+    // Get and store the index of the function context.
+    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    AllocaInst *FnCtx = cast<AllocaInst>(I.getArgOperand(0));
+    int FI = FuncInfo.StaticAllocaMap[FnCtx];
+    MFI->setFunctionContextIndex(FI);
+    return 0;
+  }
    case Intrinsic::eh_sjlj_setjmp: {
      setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
                               getValue(I.getArgOperand(0))));
@@ -4562,6 +4907,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    case Intrinsic::pow:
      visitPow(I);
      return 0;
+  case Intrinsic::fma:
+    setValue(&I, DAG.getNode(ISD::FMA, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1)),
+                             getValue(I.getArgOperand(2))));
+    return 0;
    case Intrinsic::convert_to_fp16:
      setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
                               MVT::i16, getValue(I.getArgOperand(0))));
@@ -4674,12 +5026,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
      Ops[5] = DAG.getSrcValue(F);
  
-    Res = DAG.getNode(ISD::TRAMPOLINE, dl,
-                      DAG.getVTList(TLI.getPointerTy(), MVT::Other),
-                      Ops, 6);
+    Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6);
  
-    setValue(&I, Res);
-    DAG.setRoot(Res.getValue(1));
+    DAG.setRoot(Res);
+    return 0;
+  }
+  case Intrinsic::adjust_trampoline: {
+    setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl,
+                             TLI.getPointerTy(),
+                             getValue(I.getArgOperand(0))));
      return 0;
    }
    case Intrinsic::gcroot:
@@ -4698,9 +5053,29 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    case Intrinsic::flt_rounds:
      setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
      return 0;
-  case Intrinsic::trap:
-    DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+
+  case Intrinsic::expect: {
+    // Just replace __builtin_expect(exp, c) with EXP.
+    setValue(&I, getValue(I.getArgOperand(0)));
      return 0;
+  }
+
+  case Intrinsic::trap: {
+    StringRef TrapFuncName = getTrapFunctionName();
+    if (TrapFuncName.empty()) {
+      DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+      return 0;
+    }
+    TargetLowering::ArgListTy Args;
+    std::pair<SDValue, SDValue> Result =
+      TLI.LowerCallTo(getRoot(), I.getType(),
+                 false, false, false, false, 0, CallingConv::C,
+                 /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+                 DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
+                 Args, DAG, getCurDebugLoc());
+    DAG.setRoot(Result.second);
+    return 0;
+  }
    case Intrinsic::uadd_with_overflow:
      return implVisitAluOverflow(I, ISD::UADDO);
    case Intrinsic::sadd_with_overflow:
@@ -4715,15 +5090,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      return implVisitAluOverflow(I, ISD::SMULO);
  
    case Intrinsic::prefetch: {
-    SDValue Ops[4];
+    SDValue Ops[5];
      unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
      Ops[0] = getRoot();
      Ops[1] = getValue(I.getArgOperand(0));
      Ops[2] = getValue(I.getArgOperand(1));
      Ops[3] = getValue(I.getArgOperand(2));
+    Ops[4] = getValue(I.getArgOperand(3));
      DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
                                          DAG.getVTList(MVT::Other),
-                                        &Ops[0], 4,
+                                        &Ops[0], 5,
                                          EVT::getIntegerVT(*Context, 8),
                                          MachinePointerInfo(I.getArgOperand(0)),
                                          0, /* align */
@@ -4750,7 +5126,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                      getValue(I.getArgOperand(0)),
                      getValue(I.getArgOperand(1)),
                      getValue(I.getArgOperand(2)),
-                    MachinePointerInfo(I.getArgOperand(0)));
+                    MachinePointerInfo(I.getArgOperand(0)), 0 /* Alignment */,
+                    Monotonic, CrossThread);
      setValue(&I, L);
      DAG.setRoot(L.getValue(1));
      return 0;
@@ -4793,9 +5170,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
  void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                        bool isTailCall,
                                        MachineBasicBlock *LandingPad) {
-  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
-  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
-  const Type *RetTy = FTy->getReturnType();
+  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  Type *RetTy = FTy->getReturnType();
    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
    MCSymbol *BeginLabel = 0;
  
@@ -4810,7 +5187,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                  Outs, TLI, &Offsets);
  
    bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
-                        FTy->isVarArg(), Outs, FTy->getContext());
+                                          DAG.getMachineFunction(),
+                                          FTy->isVarArg(), Outs,
+                                          FTy->getContext());
  
    SDValue DemoteStackSlot;
    int DemoteStackIdx = -100;
@@ -4822,7 +5201,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                        FTy->getReturnType());
      MachineFunction &MF = DAG.getMachineFunction();
      DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
-    const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+    Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
  
      DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
      Entry.Node = DemoteStackSlot;
@@ -4840,8 +5219,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
  
    for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
         i != e; ++i) {
-    SDValue ArgNode = getValue(*i);
-    Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
+    const Value *V = *i;
+
+    // Skip empty types
+    if (V->getType()->isEmptyTy())
+      continue;
+
+    SDValue ArgNode = getValue(V);
+    Entry.Node = ArgNode; Entry.Ty = V->getType();
  
      unsigned attrInd = i - CS.arg_begin() + 1;
      Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
@@ -4904,7 +5289,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
      // The instruction result is the result of loading from the
      // hidden sret parameter.
      SmallVector<EVT, 1> PVTs;
-    const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+    Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
  
      ComputeValueVTs(TLI, PtrRetTy, PVTs);
      assert(PVTs.size() == 1 && "Pointers should fit in one register");
@@ -4953,15 +5338,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
               DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
                           DAG.getVTList(&RetTys[0], RetTys.size()),
                           &ReturnValues[0], ReturnValues.size()));
-
    }
  
-  // As a special case, a null chain means that a tail call has been emitted and
-  // the DAG root is already updated.
-  if (Result.second.getNode())
-    DAG.setRoot(Result.second);
-  else
+  // Assign order to nodes here. If the call does not produce a result, it won't
+  // be mapped to a SDNode and visit() will not assign it an order number.
+  if (!Result.second.getNode()) {
+    // As a special case, a null chain means that a tail call has been emitted and
+    // the DAG root is already updated.
      HasTailCall = true;
+    ++SDNodeOrder;
+    AssignOrderingToNode(DAG.getRoot().getNode());
+  } else {
+    DAG.setRoot(Result.second);
+    ++SDNodeOrder;
+    AssignOrderingToNode(Result.second.getNode());
+  }
  
    if (LandingPad) {
      // Insert a label at the end of the invoke call to mark the try range.  This
@@ -4991,7 +5382,7 @@ static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
  }
  
  static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
-                             const Type *LoadTy,
+                             Type *LoadTy,
                               SelectionDAGBuilder &Builder) {
  
    // Check to see if this load can be trivially constant folded, e.g. if the
@@ -5054,7 +5445,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
    if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
      bool ActuallyDoIt = true;
      MVT LoadVT;
-    const Type *LoadTy;
+    Type *LoadTy;
      switch (Size->getZExtValue()) {
      default:
        LoadVT = MVT::Other;
@@ -5122,14 +5513,14 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
  
    // See if any floating point values are being passed to this function. This is
    // used to emit an undefined reference to fltused on Windows.
-  const FunctionType *FT =
+  FunctionType *FT =
      cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
    if (FT->isVarArg() &&
        !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
      for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
-      const Type* T = I.getArgOperand(i)->getType();
-      for (po_iterator<const Type*> i = po_begin(T), e = po_end(T);
+      Type* T = I.getArgOperand(i)->getType();
+      for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
             i != e; ++i) {
          if (!i->isFloatingPointTy()) continue;
          MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
@@ -5227,12 +5618,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
    LowerCallTo(&I, Callee, I.isTailCall());
  }
  
-namespace llvm {
+namespace {
  
  /// AsmOperandInfo - This contains information for each constraint that we are
  /// lowering.
-class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo :
-    public TargetLowering::AsmOperandInfo {
+class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
  public:
    /// CallOperand - If this is the result output operand or a clobber
    /// this is null, otherwise it is the incoming operand to the CallInst.
@@ -5274,17 +5664,23 @@ public:
      if (isa<BasicBlock>(CallOperandVal))
        return TLI.getPointerTy();
  
-    const llvm::Type *OpTy = CallOperandVal->getType();
+    llvm::Type *OpTy = CallOperandVal->getType();
  
+    // FIXME: code duplicated from TargetLowering::ParseConstraints().
      // If this is an indirect operand, the operand is a pointer to the
      // accessed type.
      if (isIndirect) {
-      const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+      llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
        if (!PtrTy)
          report_fatal_error("Indirect operand for inline asm not a pointer!");
        OpTy = PtrTy->getElementType();
      }
  
+    // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
+    if (StructType *STy = dyn_cast<StructType>(OpTy))
+      if (STy->getNumElements() == 1)
+        OpTy = STy->getElementType(0);
+
      // If OpTy is not a single value, it may be a struct/union that we
      // can tile with integers.
      if (!OpTy->isSingleValueType() && OpTy->isSized()) {
@@ -5320,55 +5716,7 @@ private:
  
  typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
  
-} // end llvm namespace.
-
-/// isAllocatableRegister - If the specified register is safe to allocate,
-/// i.e. it isn't a stack pointer or some other special register, return the
-/// register class for the register.  Otherwise, return null.
-static const TargetRegisterClass *
-isAllocatableRegister(unsigned Reg, MachineFunction &MF,
-                      const TargetLowering &TLI,
-                      const TargetRegisterInfo *TRI) {
-  EVT FoundVT = MVT::Other;
-  const TargetRegisterClass *FoundRC = 0;
-  for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
-       E = TRI->regclass_end(); RCI != E; ++RCI) {
-    EVT ThisVT = MVT::Other;
-
-    const TargetRegisterClass *RC = *RCI;
-    // If none of the value types for this register class are valid, we
-    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
-    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
-         I != E; ++I) {
-      if (TLI.isTypeLegal(*I)) {
-        // If we have already found this register in a different register class,
-        // choose the one with the largest VT specified.  For example, on
-        // PowerPC, we favor f64 register classes over f32.
-        if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
-          ThisVT = *I;
-          break;
-        }
-      }
-    }
-
-    if (ThisVT == MVT::Other) continue;
-
-    // NOTE: This isn't ideal.  In particular, this might allocate the
-    // frame pointer in functions that need it (due to them not being taken
-    // out of allocation, because a variable sized allocation hasn't been seen
-    // yet).  This is a slight code pessimization, but should still work.
-    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
-         E = RC->allocation_order_end(MF); I != E; ++I)
-      if (*I == Reg) {
-        // We found a matching register class.  Keep looking at others in case
-        // we find one with larger registers that this physreg is also in.
-        FoundRC = RC;
-        FoundVT = ThisVT;
-        break;
-      }
-  }
-  return FoundRC;
-}
+} // end anonymous namespace
  
  /// GetRegistersForValue - Assign registers (virtual or physical) for the
  /// specified operand.  We prefer to assign virtual registers, to allow the
@@ -5379,11 +5727,13 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF,
  ///   OpInfo describes the operand.
  ///   Input and OutputRegs are the set of already allocated physical registers.
  ///
-void SelectionDAGBuilder::
-GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
-                     std::set<unsigned> &OutputRegs,
-                     std::set<unsigned> &InputRegs) {
-  LLVMContext &Context = FuncInfo.Fn->getContext();
+static void GetRegistersForValue(SelectionDAG &DAG,
+                                 const TargetLowering &TLI,
+                                 DebugLoc DL,
+                                 SDISelAsmOperandInfo &OpInfo,
+                                 std::set<unsigned> &OutputRegs,
+                                 std::set<unsigned> &InputRegs) {
+  LLVMContext &Context = *DAG.getContext();
  
    // Compute whether this value requires an input register, an output register,
    // or both.
@@ -5429,7 +5779,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
        // vector types).
        EVT RegVT = *PhysReg.second->vt_begin();
        if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
-        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
                                           RegVT, OpInfo.CallOperand);
          OpInfo.ConstraintVT = RegVT;
        } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
@@ -5439,7 +5789,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
          // machine.
          RegVT = EVT::getIntegerVT(Context,
                                    OpInfo.ConstraintVT.getSizeInBits());
-        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
                                           RegVT, OpInfo.CallOperand);
          OpInfo.ConstraintVT = RegVT;
        }
@@ -5502,52 +5852,6 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
      return;
    }
  
-  // This is a reference to a register class that doesn't directly correspond
-  // to an LLVM register class.  Allocate NumRegs consecutive, available,
-  // registers from the class.
-  std::vector<unsigned> RegClassRegs
-    = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                            OpInfo.ConstraintVT);
-
-  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
-  unsigned NumAllocated = 0;
-  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
-    unsigned Reg = RegClassRegs[i];
-    // See if this register is available.
-    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
-        (isInReg  && InputRegs.count(Reg))) {    // Already used.
-      // Make sure we find consecutive registers.
-      NumAllocated = 0;
-      continue;
-    }
-
-    // Check to see if this register is allocatable (i.e. don't give out the
-    // stack pointer).
-    const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
-    if (!RC) {        // Couldn't allocate this register.
-      // Reset NumAllocated to make sure we return consecutive registers.
-      NumAllocated = 0;
-      continue;
-    }
-
-    // Okay, this register is good, we can use it.
-    ++NumAllocated;
-
-    // If we allocated enough consecutive registers, succeed.
-    if (NumAllocated == NumRegs) {
-      unsigned RegStart = (i-NumAllocated)+1;
-      unsigned RegEnd   = i+1;
-      // Mark all of the allocated registers used.
-      for (unsigned i = RegStart; i != RegEnd; ++i)
-        Regs.push_back(RegClassRegs[i]);
-
-      OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(),
-                                         OpInfo.ConstraintVT);
-      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
-      return;
-    }
-  }
-
    // Otherwise, we couldn't allocate enough registers for this.
  }
  
@@ -5561,7 +5865,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
  
    std::set<unsigned> OutputRegs, InputRegs;
  
-  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS);
+  TargetLowering::AsmOperandInfoVector
+    TargetConstraints = TLI.ParseConstraints(CS);
+
    bool hasMemory = false;
  
    unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
@@ -5585,7 +5891,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        // corresponding argument.
        assert(!CS.getType()->isVoidTy() &&
               "Bad inline asm!");
-      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+      if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
          OpVT = TLI.getValueType(STy->getElementType(ResNo));
        } else {
          assert(ResNo == 0 && "Asm only has one result!");
@@ -5620,7 +5926,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        hasMemory = true;
      else {
        for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
-        TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]);
+        TargetLowering::ConstraintType
+          CType = TLI.getConstraintType(OpInfo.Codes[j]);
          if (CType == TargetLowering::C_Memory) {
            hasMemory = true;
            break;
@@ -5651,10 +5958,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
  
        if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+       std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+         TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                           OpInfo.ConstraintVT);
+       std::pair<unsigned, const TargetRegisterClass*> InputRC =
+         TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
+                                           Input.ConstraintVT);
          if ((OpInfo.ConstraintVT.isInteger() !=
               Input.ConstraintVT.isInteger()) ||
-            (OpInfo.ConstraintVT.getSizeInBits() !=
-             Input.ConstraintVT.getSizeInBits())) {
+            (MatchRC.second != InputRC.second)) {
            report_fatal_error("Unsupported asm: input constraint"
                               " with a matching output constraint of"
                               " incompatible type!");
@@ -5670,12 +5982,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
      // need to to provide an address for the memory input.
      if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
          !OpInfo.isIndirect) {
-      assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) &&
+      assert((OpInfo.isMultipleAlternative ||
+              (OpInfo.Type == InlineAsm::isInput)) &&
               "Can only indirectify direct input operands!");
  
        // Memory operands really want the address of the value.  If we don't have
        // an indirect input, put it in the constpool if we can, otherwise spill
        // it to a stack slot.
+      // TODO: This isn't quite right. We need to handle these according to
+      // the addressing mode that the constraint wants. Also, this may take
+      // an additional register for the computation and we don't want that
+      // either.
  
        // If the operand is a float, integer, or vector constant, spill to a
        // constant pool entry to get its address.
@@ -5687,7 +6004,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        } else {
          // Otherwise, create a stack slot and emit a store to it before the
          // asm.
-        const Type *Ty = OpVal->getType();
+        Type *Ty = OpVal->getType();
          uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
          unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
          MachineFunction &MF = DAG.getMachineFunction();
@@ -5710,7 +6027,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
      // If this constraint is for a specific register, allocate it before
      // anything else.
      if (OpInfo.ConstraintType == TargetLowering::C_Register)
-      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
+                           InputRegs);
    }
  
    // Second pass - Loop over all of the operands, assigning virtual or physregs
@@ -5721,7 +6039,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
      // C_Register operands have already been allocated, Other/Memory don't need
      // to be.
      if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
-      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
+                           InputRegs);
    }
  
    // AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -5875,7 +6194,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
  
        if (OpInfo.ConstraintType == TargetLowering::C_Other) {
          std::vector<SDValue> Ops;
-        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
+        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
                                           Ops, DAG);
          if (Ops.empty())
            report_fatal_error("Invalid operand for inline asm constraint '" +
@@ -5910,8 +6229,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
               "Don't know how to handle indirect register inputs yet!");
  
        // Copy the input into the appropriate registers.
-      if (OpInfo.AssignedRegs.Regs.empty() ||
-          !OpInfo.AssignedRegs.areValueTypesLegal(TLI))
+      if (OpInfo.AssignedRegs.Regs.empty())
          report_fatal_error("Couldn't allocate input reg for constraint '" +
                             Twine(OpInfo.ConstraintCode) + "'!");
  
@@ -5926,8 +6244,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        // Add the clobbered value to the operand list, so that the register
        // allocator is aware that the physreg got clobbered.
        if (!OpInfo.AssignedRegs.Regs.empty())
-        OpInfo.AssignedRegs.AddInlineAsmOperands(
-                                            InlineAsm::Kind_RegDefEarlyClobber,
+        OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
                                                   false, 0, DAG,
                                                   AsmNodeOperands);
        break;
@@ -6048,7 +6365,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
  /// FIXME: When all targets are
  /// migrated to using LowerCall, this hook should be integrated into SDISel.
  std::pair<SDValue, SDValue>
-TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
                              bool RetSExt, bool RetZExt, bool isVarArg,
                              bool isInreg, unsigned NumFixedArgs,
                              CallingConv::ID CallConv, bool isTailCall,
@@ -6065,7 +6382,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
      for (unsigned Value = 0, NumValues = ValueVTs.size();
           Value != NumValues; ++Value) {
        EVT VT = ValueVTs[Value];
-      const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+      Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
        SDValue Op = SDValue(Args[i].Node.getNode(),
                             Args[i].Node.getResNo() + Value);
        ISD::ArgFlagsTy Flags;
@@ -6082,16 +6399,17 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
          Flags.setSRet();
        if (Args[i].isByVal) {
          Flags.setByVal();
-        const PointerType *Ty = cast<PointerType>(Args[i].Ty);
-        const Type *ElementTy = Ty->getElementType();
-        unsigned FrameAlign = getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
+        PointerType *Ty = cast<PointerType>(Args[i].Ty);
+        Type *ElementTy = Ty->getElementType();
+        Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
          // For ByVal, alignment should come from FE.  BE will guess if this
          // info is not there but there are cases it cannot get right.
+        unsigned FrameAlign;
          if (Args[i].Alignment)
            FrameAlign = Args[i].Alignment;
+        else
+          FrameAlign = getByValTypeAlignment(ElementTy);
          Flags.setByValAlign(FrameAlign);
-        Flags.setByValSize(FrameSize);
        }
        if (Args[i].isNest)
          Flags.setNest();
@@ -6197,7 +6515,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
  
    // For a function returning void, there is no return value. We can't create
    // such a node, so we just return a null return value in that case. In
-  // that case, nothing will actualy look at the value.
+  // that case, nothing will actually look at the value.
    if (ReturnValues.empty())
      return std::make_pair(SDValue(), Chain);
  
@@ -6236,6 +6554,25 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
  
  #include "llvm/CodeGen/SelectionDAGISel.h"
  
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true.  This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(const Argument *A) {
+  // With FastISel active, we may be splitting blocks, so force creation
+  // of virtual registers for all non-dead arguments.
+  if (EnableFastISel)
+    return A->use_empty();
+
+  const BasicBlock *Entry = A->getParent()->begin();
+  for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
+       UI != E; ++UI) {
+    const User *U = *UI;
+    if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+      return false;  // Use not in entry block.
+  }
+  return true;
+}
+
  void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
    // If this is the entry block, emit arguments.
    const Function &F = *LLVMBB->getParent();
@@ -6273,7 +6610,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
      for (unsigned Value = 0, NumValues = ValueVTs.size();
           Value != NumValues; ++Value) {
        EVT VT = ValueVTs[Value];
-      const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+      Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
        ISD::ArgFlagsTy Flags;
        unsigned OriginalAlignment =
          TD->getABITypeAlignment(ArgTy);
@@ -6288,16 +6625,17 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
          Flags.setSRet();
        if (F.paramHasAttr(Idx, Attribute::ByVal)) {
          Flags.setByVal();
-        const PointerType *Ty = cast<PointerType>(I->getType());
-        const Type *ElementTy = Ty->getElementType();
-        unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
+        PointerType *Ty = cast<PointerType>(I->getType());
+        Type *ElementTy = Ty->getElementType();
+        Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
          // For ByVal, alignment should be passed from FE.  BE will guess if
          // this info is not there but there are cases it cannot get right.
+        unsigned FrameAlign;
          if (F.getParamAlignment(Idx))
            FrameAlign = F.getParamAlignment(Idx);
+        else
+          FrameAlign = TLI.getByValTypeAlignment(ElementTy);
          Flags.setByValAlign(FrameAlign);
-        Flags.setByValSize(FrameSize);
        }
        if (F.paramHasAttr(Idx, Attribute::Nest))
          Flags.setNest();
@@ -6379,8 +6717,8 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
      if (I->use_empty() && NumValues)
        SDB->setUnusedArgValue(I, InVals[i]);
  
-    for (unsigned Value = 0; Value != NumValues; ++Value) {
-      EVT VT = ValueVTs[Value];
+    for (unsigned Val = 0; Val != NumValues; ++Val) {
+      EVT VT = ValueVTs[Val];
        EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
        unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
  
@@ -6399,21 +6737,42 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
        i += NumParts;
      }
  
-    // Note down frame index for byval arguments.
-    if (I->hasByValAttr() && !ArgValues.empty())
-      if (FrameIndexSDNode *FI =
-          dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
-        FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
+    // We don't need to do anything else for unused arguments.
+    if (ArgValues.empty())
+      continue;
  
-    if (!I->use_empty()) {
-      SDValue Res;
-      if (!ArgValues.empty())
-        Res = DAG.getMergeValues(&ArgValues[0], NumValues,
-                                 SDB->getCurDebugLoc());
-      SDB->setValue(I, Res);
+    // Note down frame index.
+    if (FrameIndexSDNode *FI =
+       dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+      FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+
+    SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+                                     SDB->getCurDebugLoc());
+
+    SDB->setValue(I, Res);
+    if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+      if (LoadSDNode *LNode = 
+          dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
+        if (FrameIndexSDNode *FI =
+            dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+        FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+    }
  
-      // If this argument is live outside of the entry block, insert a copy from
-      // whereever we got it to the vreg that other BB's will reference it as.
+    // If this argument is live outside of the entry block, insert a copy from
+    // wherever we got it to the vreg that other BB's will reference it as.
+    if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+      // If we can, though, try to skip creating an unnecessary vreg.
+      // FIXME: This isn't very clean... it would be nice to make this more
+      // general.  It's also subtly incompatible with the hacks FastISel
+      // uses with vregs.
+      unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        FuncInfo->ValueMap[I] = Reg;
+        continue;
+      }
+    }
+    if (!isOnlyUsedInEntryBlock(I)) {
+      FuncInfo->InitializeRegForValue(I);
        SDB->CopyToExportRegsIfNeeded(I);
      }
    }
@@ -6459,6 +6818,10 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
        // Ignore dead phi's.
        if (PN->use_empty()) continue;
  
+      // Skip empty types
+      if (PN->getType()->isEmptyTy())
+        continue;
+
        unsigned Reg;
        const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);