Add support for vectors of pointers.

[oota-llvm.git] / lib / CodeGen / SelectionDAG / SelectionDAGBuilder.cpp
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index a01e2b86c2b7389b4313deb8a9fa30d5a44df6f3..ac24ce5868e990051afe08489f4d93bc99bc4dba 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -41,7 +41,6 @@
  #include "llvm/CodeGen/MachineJumpTableInfo.h"
  #include "llvm/CodeGen/MachineModuleInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
  #include "llvm/CodeGen/SelectionDAG.h"
  #include "llvm/Analysis/DebugInfo.h"
  #include "llvm/Target/TargetData.h"
@@ -280,12 +279,35 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
      }
  
      // Vector/Vector bitcast.
-    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+    if (ValueVT.getSizeInBits() == PartVT.getSizeInBits())
+      return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+    assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
+      "Cannot handle this kind of promotion");
+    // Promoted vector extract
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT, Val);
+
    }
  
-  assert(ValueVT.getVectorElementType() == PartVT &&
-         ValueVT.getVectorNumElements() == 1 &&
+  // Trivial bitcast if the types are the same size and the destination
+  // vector type is legal.
+  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() &&
+      TLI.isTypeLegal(ValueVT))
+    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+  // Handle cases such as i8 -> <1 x i1>
+  assert(ValueVT.getVectorNumElements() == 1 &&
           "Only trivial scalar-to-vector conversions should get here!");
+
+  if (ValueVT.getVectorNumElements() == 1 &&
+      ValueVT.getVectorElementType() != PartVT) {
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT.getScalarType(), Val);
+  }
+
    return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
  }
  
@@ -426,7 +448,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
        // Bitconvert vector->vector case.
        Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
      } else if (PartVT.isVector() &&
-               PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
+               PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
                 PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
        EVT ElementVT = PartVT.getVectorElementType();
        // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
@@ -446,13 +468,25 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
  
        //SDValue UndefElts = DAG.getUNDEF(VectorTy);
        //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
-    } else {
+    } else if (PartVT.isVector() &&
+               PartVT.getVectorElementType().bitsGE(
+                 ValueVT.getVectorElementType()) &&
+               PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
+
+      // Promoted vector extract
+      bool Smaller = PartVT.bitsLE(ValueVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                        DL, PartVT, Val);
+    } else{
        // Vector -> scalar conversion.
-      assert(ValueVT.getVectorElementType() == PartVT &&
-             ValueVT.getVectorNumElements() == 1 &&
+      assert(ValueVT.getVectorNumElements() == 1 &&
               "Only trivial vector-to-scalar conversions should get here!");
        Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                          PartVT, Val, DAG.getIntPtrConstant(0));
+
+      bool Smaller = ValueVT.bitsLE(PartVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                         DL, PartVT, Val);
      }
  
      Parts[0] = Val;
@@ -543,7 +577,7 @@ namespace {
        : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
  
      RegsForValue(LLVMContext &Context, const TargetLowering &tli,
-                 unsigned Reg, const Type *Ty) {
+                 unsigned Reg, Type *Ty) {
        ComputeValueVTs(tli, Ty, ValueVTs);
  
        for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
@@ -753,6 +787,18 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
    unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
    if (HasMatching)
      Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+  else if (!Regs.empty() &&
+           TargetRegisterInfo::isVirtualRegister(Regs.front())) {
+    // Put the register class of the virtual registers in the flag word.  That
+    // way, later passes can recompute register class constraints for inline
+    // assembly as well as normal instructions.
+    // Don't do this for tied operands that can use the regclass information
+    // from the def.
+    const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+    const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
+    Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
+  }
+
    SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
    Ops.push_back(Res);
  
@@ -770,6 +816,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
    AA = &aa;
    GFI = gfi;
    TD = DAG.getTarget().getTargetData();
+  LPadToCallSiteMap.clear();
  }
  
  /// clear - Clear out the current SelectionDAG and the associated
@@ -783,11 +830,20 @@ void SelectionDAGBuilder::clear() {
    UnusedArgNodeMap.clear();
    PendingLoads.clear();
    PendingExports.clear();
-  DanglingDebugInfoMap.clear();
    CurDebugLoc = DebugLoc();
    HasTailCall = false;
  }
  
+/// clearDanglingDebugInfo - Clear the dangling debug information
+/// map. This function is seperated from the clear so that debug
+/// information that is dangling in a basic block can be properly
+/// resolved in a different basic block. This allows the
+/// SelectionDAG to resolve dangling debug information attached
+/// to PHI nodes.
+void SelectionDAGBuilder::clearDanglingDebugInfo() {
+  DanglingDebugInfoMap.clear();
+}
+
  /// getRoot - Return the current virtual root of the Selection DAG,
  /// flushing any PendingLoad items. This must be done before emitting
  /// a store or any other node that may need to be ordered after any
@@ -912,7 +968,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
    }
  }
  
-// getValue - Return an SDValue for the given Value.
+/// getValue - Return an SDValue for the given Value.
  SDValue SelectionDAGBuilder::getValue(const Value *V) {
    // If we already have an SDValue for this value, use it. It's important
    // to do this first, so that we don't create a CopyFromReg if we already
@@ -927,7 +983,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
      unsigned InReg = It->second;
      RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
      SDValue Chain = DAG.getEntryNode();
-    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
      resolveDanglingDebugInfo(V, N);
      return N;
    }
@@ -1025,7 +1081,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
      if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
        return DAG.getBlockAddress(BA, VT);
  
-    const VectorType *VecTy = cast<VectorType>(V->getType());
+    VectorType *VecTy = cast<VectorType>(V->getType());
      unsigned NumElements = VecTy->getNumElements();
  
      // Now that we know the number and type of the elements, get that number of
@@ -1175,6 +1231,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
  /// created for it, emit nodes to copy the value into the virtual
  /// registers.
  void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+  // Skip empty types
+  if (V->getType()->isEmptyTy())
+    return;
+
    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
    if (VMI != FuncInfo.ValueMap.end()) {
      assert(!V->use_empty() && "Unused value assigned virtual registers!");
@@ -1223,6 +1283,26 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
    return true;
  }
  
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
+                                            MachineBasicBlock *Dst) {
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  if (!BPI)
+    return 0;
+  const BasicBlock *SrcBB = Src->getBasicBlock();
+  const BasicBlock *DstBB = Dst->getBasicBlock();
+  return BPI->getEdgeWeight(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::
+addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
+                       uint32_t Weight /* = 0 */) {
+  if (!Weight)
+    Weight = getEdgeWeight(Src, Dst);
+  Src->addSuccessor(Dst, Weight);
+}
+
+
  static bool InBlock(const Value *V, const BasicBlock *BB) {
    if (const Instruction *I = dyn_cast<Instruction>(V))
      return I->getParent() == BB;
@@ -1255,6 +1335,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
          Condition = getICmpCondCode(IC->getPredicate());
        } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
          Condition = getFCmpCondCode(FC->getPredicate());
+        if (TM.Options.NoNaNsFPMath)
+          Condition = getFCmpCodeWithoutNaN(Condition);
        } else {
          Condition = ISD::SETEQ; // silence warning.
          llvm_unreachable("Unknown compare instruction");
@@ -1492,8 +1574,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
    }
  
    // Update successor info
-  SwitchBB->addSuccessor(CB.TrueBB);
-  SwitchBB->addSuccessor(CB.FalseBB);
+  addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
+  addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
  
    // Set NextBlock to be the MBB immediately after the current one, if any.
    // This is used to avoid emitting unnecessary branches to the next block.
@@ -1611,7 +1693,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
      UsePtrType = true;
    else {
      for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
-      if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) {
+      if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
          // Switch table case range are encoded into series of masks.
          // Just use pointer type, it's guaranteed to fit.
          UsePtrType = true;
@@ -1637,8 +1719,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
  
    MachineBasicBlock* MBB = B.Cases[0].ThisBB;
  
-  SwitchBB->addSuccessor(B.Default);
-  SwitchBB->addSuccessor(MBB);
+  addSuccessorWithWeight(SwitchBB, B.Default);
+  addSuccessorWithWeight(SwitchBB, MBB);
  
    SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                                  MVT::Other, CopyTo, RangeCmp,
@@ -1661,7 +1743,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
    SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
                                         Reg, VT);
    SDValue Cmp;
-  if (CountPopulation_64(B.Mask) == 1) {
+  unsigned PopCount = CountPopulation_64(B.Mask);
+  if (PopCount == 1) {
      // Testing for a single bit; just compare the shift count with what it
      // would need to be to shift a 1 bit in that position.
      Cmp = DAG.getSetCC(getCurDebugLoc(),
@@ -1669,6 +1752,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                         ShiftOp,
                         DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
                         ISD::SETEQ);
+  } else if (PopCount == BB.Range) {
+    // There is only one zero bit in the range, test for it directly.
+    Cmp = DAG.getSetCC(getCurDebugLoc(),
+                       TLI.getSetCCResultType(VT),
+                       ShiftOp,
+                       DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
+                       ISD::SETNE);
    } else {
      // Make desired shift
      SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
@@ -1683,8 +1773,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                         ISD::SETNE);
    }
  
-  SwitchBB->addSuccessor(B.TargetBB);
-  SwitchBB->addSuccessor(NextMBB);
+  addSuccessorWithWeight(SwitchBB, B.TargetBB);
+  addSuccessorWithWeight(SwitchBB, NextMBB);
  
    SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                                MVT::Other, getControlRoot(),
@@ -1722,8 +1812,8 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
    CopyToExportRegsIfNeeded(&I);
  
    // Update successor info
-  InvokeMBB->addSuccessor(Return);
-  InvokeMBB->addSuccessor(LandingPad);
+  addSuccessorWithWeight(InvokeMBB, Return);
+  addSuccessorWithWeight(InvokeMBB, LandingPad);
  
    // Drop into normal successor.
    DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
@@ -1734,6 +1824,49 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
  void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
  }
  
+void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
+  llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
+}
+
+void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
+  assert(FuncInfo.MBB->isLandingPad() &&
+         "Call to landingpad not in landing pad!");
+
+  MachineBasicBlock *MBB = FuncInfo.MBB;
+  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  AddLandingPadInfo(LP, MMI, MBB);
+
+  SmallVector<EVT, 2> ValueVTs;
+  ComputeValueVTs(TLI, LP.getType(), ValueVTs);
+
+  // Insert the EXCEPTIONADDR instruction.
+  assert(FuncInfo.MBB->isLandingPad() &&
+         "Call to eh.exception not in landing pad!");
+  SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+  SDValue Ops[2];
+  Ops[0] = DAG.getRoot();
+  SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurDebugLoc(), VTs, Ops, 1);
+  SDValue Chain = Op1.getValue(1);
+
+  // Insert the EHSELECTION instruction.
+  VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+  Ops[0] = Op1;
+  Ops[1] = Chain;
+  SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurDebugLoc(), VTs, Ops, 2);
+  Chain = Op2.getValue(1);
+  Op2 = DAG.getSExtOrTrunc(Op2, getCurDebugLoc(), MVT::i32);
+
+  Ops[0] = Op1;
+  Ops[1] = Op2;
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                            DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+                            &Ops[0], 2);
+
+  std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain);
+  setValue(&LP, RetPair.first);
+  DAG.setRoot(RetPair.second);
+}
+
  /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
  /// small case ranges).
  bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
@@ -1792,8 +1925,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
                                      ISD::SETEQ);
  
          // Update successor info.
-        SwitchBB->addSuccessor(Small.BB);
-        SwitchBB->addSuccessor(Default);
+        addSuccessorWithWeight(SwitchBB, Small.BB);
+        addSuccessorWithWeight(SwitchBB, Default);
  
          // Insert the true branch.
          SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
@@ -1849,7 +1982,11 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
        CC = ISD::SETLE;
        LHS = I->Low; MHS = SV; RHS = I->High;
      }
-    CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
+
+    uint32_t ExtraWeight = I->ExtraWeight;
+    CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
+                 /* me */ CurBlock,
+                 /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2);
  
      // If emitting the first comparison, just call visitSwitchCase to emit the
      // code into the current block.  Otherwise, push the CaseBlock onto the
@@ -1867,7 +2004,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
  }
  
  static inline bool areJTsAllowed(const TargetLowering &TLI) {
-  return !DisableJumpTables &&
+  return !TLI.getTargetMachine().Options.DisableJumpTables &&
            (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
             TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
  }
@@ -1879,10 +2016,10 @@ static APInt ComputeRange(const APInt &First, const APInt &Last) {
  }
  
  /// handleJTSwitchCase - Emit jumptable for current switch case range
-bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
-                                             CaseRecVector& WorkList,
-                                             const Value* SV,
-                                             MachineBasicBlock* Default,
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
+                                             CaseRecVector &WorkList,
+                                             const Value *SV,
+                                             MachineBasicBlock *Default,
                                               MachineBasicBlock *SwitchBB) {
    Case& FrontCase = *CR.Range.first;
    Case& BackCase  = *(CR.Range.second-1);
@@ -1891,22 +2028,24 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
    const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
  
    APInt TSize(First.getBitWidth(), 0);
-  for (CaseItr I = CR.Range.first, E = CR.Range.second;
-       I!=E; ++I)
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
      TSize += I->size();
  
    if (!areJTsAllowed(TLI) || TSize.ult(4))
      return false;
  
    APInt Range = ComputeRange(First, Last);
-  double Density = TSize.roundToDouble() / Range.roundToDouble();
-  if (Density < 0.4)
+  // The density is TSize / Range. Require at least 40%.
+  // It should not be possible for IntTSize to saturate for sane code, but make
+  // sure we handle Range saturation correctly.
+  uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10);
+  uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10);
+  if (IntTSize * 10 < IntRange * 4)
      return false;
  
    DEBUG(dbgs() << "Lowering jump table\n"
                 << "First entry: " << First << ". Last entry: " << Last << '\n'
-               << "Range: " << Range
-               << ". Size: " << TSize << ". Density: " << Density << "\n\n");
+               << "Range: " << Range << ". Size: " << TSize << ".\n\n");
  
    // Get the MachineFunction which holds the current MBB.  This is used when
    // inserting any additional MBBs necessary to represent the switch.
@@ -1924,8 +2063,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
    // table.
    MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
    CurMF->insert(BBI, JumpTableBB);
-  CR.CaseBB->addSuccessor(Default);
-  CR.CaseBB->addSuccessor(JumpTableBB);
+
+  addSuccessorWithWeight(CR.CaseBB, Default);
+  addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
  
    // Build a vector of destination BBs, corresponding to each target
    // of the jump table. If the value of the jump table slot corresponds to
@@ -1952,7 +2092,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
           E = DestBBs.end(); I != E; ++I) {
      if (!SuccsHandled[(*I)->getNumber()]) {
        SuccsHandled[(*I)->getNumber()] = true;
-      JumpTableBB->addSuccessor(*I);
+      addSuccessorWithWeight(JumpTableBB, *I);
      }
    }
  
@@ -1969,7 +2109,6 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
      visitJumpTableHeader(JT, JTH, SwitchBB);
  
    JTCases.push_back(JumpTableBlock(JTH, JT));
-
    return true;
  }
  
@@ -2243,12 +2382,17 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
                                         const SwitchInst& SI) {
    size_t numCmps = 0;
  
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
    // Start with "simple" cases
    for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
-    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+    BasicBlock *SuccBB = SI.getSuccessor(i);
+    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
+
+    uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
+
      Cases.push_back(Case(SI.getSuccessorValue(i),
                           SI.getSuccessorValue(i),
-                         SMBB));
+                         SMBB, ExtraWeight));
    }
    std::sort(Cases.begin(), Cases.end(), CaseCmp());
  
@@ -2268,6 +2412,16 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
        if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
          I->High = J->High;
          J = Cases.erase(J);
+
+        if (BranchProbabilityInfo *BPI = FuncInfo.BPI) {
+          uint32_t CurWeight = currentBB->getBasicBlock() ?
+            BPI->getEdgeWeight(SI.getParent(), currentBB->getBasicBlock()) : 16;
+          uint32_t NextWeight = nextBB->getBasicBlock() ?
+            BPI->getEdgeWeight(SI.getParent(), nextBB->getBasicBlock()) : 16;
+
+          BPI->setEdgeWeight(SI.getParent(), currentBB->getBasicBlock(),
+                             CurWeight + NextWeight);
+        }
        } else {
          I = J++;
        }
@@ -2304,7 +2458,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
  
    // If there is only the default destination, branch to it if it is not the
    // next basic block.  Otherwise, just fall through.
-  if (SI.getNumOperands() == 2) {
+  if (SI.getNumCases() == 1) {
      // Update machine-CFG edges.
  
      // If this is not a fall-through branch, emit the branch.
@@ -2324,12 +2478,12 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
    size_t numCmps = Clusterify(Cases, SI);
    DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
                 << ". Total compares: " << numCmps << '\n');
-  numCmps = 0;
+  (void)numCmps;
  
    // Get the Value to be switched on and default basic blocks, which will be
    // inserted into CaseBlock records, representing basic blocks in the binary
    // search tree.
-  const Value *SV = SI.getOperand(0);
+  const Value *SV = SI.getCondition();
  
    // Push the initial CaseRec onto the worklist
    CaseRecVector WorkList;
@@ -2371,8 +2525,10 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
      succs.push_back(I.getSuccessor(i));
    array_pod_sort(succs.begin(), succs.end());
    succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
-  for (unsigned i = 0, e = succs.size(); i != e; ++i)
-    IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
+  for (unsigned i = 0, e = succs.size(); i != e; ++i) {
+    MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+    addSuccessorWithWeight(IndirectBrMBB, Succ);
+  }
  
    DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
                            MVT::Other, getControlRoot(),
@@ -2381,7 +2537,7 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
  
  void SelectionDAGBuilder::visitFSub(const User &I) {
    // -0.0 - X --> fneg
-  const Type *Ty = I.getType();
+  Type *Ty = I.getType();
    if (isa<Constant>(I.getOperand(0)) &&
        I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
      SDValue Op2 = getValue(I.getOperand(1));
@@ -2432,6 +2588,22 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
                             Op1.getValueType(), Op1, Op2));
  }
  
+void SelectionDAGBuilder::visitSDiv(const User &I) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+
+  // Turn exact SDivs into multiplications.
+  // FIXME: This should be in DAGCombiner, but it doesn't have access to the
+  // exact bit.
+  if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
+      !isa<ConstantSDNode>(Op1) &&
+      isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
+    setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG));
+  else
+    setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(),
+                             Op1, Op2));
+}
+
  void SelectionDAGBuilder::visitICmp(const User &I) {
    ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
    if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
@@ -2455,6 +2627,8 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
    SDValue Op1 = getValue(I.getOperand(0));
    SDValue Op2 = getValue(I.getOperand(1));
    ISD::CondCode Condition = getFCmpCondCode(predicate);
+  if (TM.Options.NoNaNsFPMath)
+    Condition = getFCmpCodeWithoutNaN(Condition);
    EVT DestVT = TLI.getValueType(I.getType());
    setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
  }
@@ -2469,10 +2643,12 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
    SDValue Cond     = getValue(I.getOperand(0));
    SDValue TrueVal  = getValue(I.getOperand(1));
    SDValue FalseVal = getValue(I.getOperand(2));
+  ISD::NodeType OpCode = Cond.getValueType().isVector() ?
+    ISD::VSELECT : ISD::SELECT;
  
    for (unsigned i = 0; i != NumValues; ++i)
-    Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
-                          TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
+    Values[i] = DAG.getNode(OpCode, getCurDebugLoc(),
+                            TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
                              Cond,
                              SDValue(TrueVal.getNode(),
                                      TrueVal.getResNo() + i),
@@ -2516,7 +2692,7 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
  }
  
  void SelectionDAGBuilder::visitFPExt(const User &I){
-  // FPTrunc is never a no-op cast, no need to check
+  // FPExt is never a no-op cast, no need to check
    SDValue N = getValue(I.getOperand(0));
    EVT DestVT = TLI.getValueType(I.getType());
    setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
@@ -2685,7 +2861,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
      // Analyze the access pattern of the vector to see if we can extract
      // two subvectors and do the shuffle. The analysis is done by calculating
      // the range of elements the mask access on both vectors.
-    int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+    int MinRange[2] = { static_cast<int>(SrcNumElts+1),
+                        static_cast<int>(SrcNumElts+1)};
      int MaxRange[2] = {-1, -1};
  
      for (unsigned i = 0; i != MaskNumElts; ++i) {
@@ -2793,12 +2970,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
  void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
    const Value *Op0 = I.getOperand(0);
    const Value *Op1 = I.getOperand(1);
-  const Type *AggTy = I.getType();
-  const Type *ValTy = Op1->getType();
+  Type *AggTy = I.getType();
+  Type *ValTy = Op1->getType();
    bool IntoUndef = isa<UndefValue>(Op0);
    bool FromUndef = isa<UndefValue>(Op1);
  
-  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
  
    SmallVector<EVT, 4> AggValueVTs;
    ComputeValueVTs(TLI, AggTy, AggValueVTs);
@@ -2810,16 +2987,18 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
    SmallVector<SDValue, 4> Values(NumAggValues);
  
    SDValue Agg = getValue(Op0);
-  SDValue Val = getValue(Op1);
    unsigned i = 0;
    // Copy the beginning value(s) from the original aggregate.
    for (; i != LinearIndex; ++i)
      Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
                  SDValue(Agg.getNode(), Agg.getResNo() + i);
    // Copy values from the inserted value(s).
-  for (; i != LinearIndex + NumValValues; ++i)
-    Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
-                SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+  if (NumValValues) {
+    SDValue Val = getValue(Op1);
+    for (; i != LinearIndex + NumValValues; ++i)
+      Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+  }
    // Copy remaining value(s) from the original aggregate.
    for (; i != NumAggValues; ++i)
      Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
@@ -2832,16 +3011,23 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
  
  void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
    const Value *Op0 = I.getOperand(0);
-  const Type *AggTy = Op0->getType();
-  const Type *ValTy = I.getType();
+  Type *AggTy = Op0->getType();
+  Type *ValTy = I.getType();
    bool OutOfUndef = isa<UndefValue>(Op0);
  
-  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
  
    SmallVector<EVT, 4> ValValueVTs;
    ComputeValueVTs(TLI, ValTy, ValValueVTs);
  
    unsigned NumValValues = ValValueVTs.size();
+
+  // Ignore a extractvalue that produces an empty object
+  if (!NumValValues) {
+    setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
+    return;
+  }
+
    SmallVector<SDValue, 4> Values(NumValValues);
  
    SDValue Agg = getValue(Op0);
@@ -2859,12 +3045,12 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
  
  void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
    SDValue N = getValue(I.getOperand(0));
-  const Type *Ty = I.getOperand(0)->getType();
+  Type *Ty = I.getOperand(0)->getType();
  
    for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
         OI != E; ++OI) {
      const Value *Idx = *OI;
-    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+    if (StructType *StTy = dyn_cast<StructType>(Ty)) {
        unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
        if (Field) {
          // N = N + Offset
@@ -2913,7 +3099,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
            unsigned Amt = ElementSize.logBase2();
            IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
                               N.getValueType(), IdxN,
-                             DAG.getConstant(Amt, TLI.getPointerTy()));
+                             DAG.getConstant(Amt, IdxN.getValueType()));
          } else {
            SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
            IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
@@ -2935,7 +3121,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
    if (FuncInfo.StaticAllocaMap.count(&I))
      return;   // getValue will auto-populate this.
  
-  const Type *Ty = I.getAllocatedType();
+  Type *Ty = I.getAllocatedType();
    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
    unsigned Align =
      std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
@@ -2982,13 +3168,17 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
  }
  
  void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
+  if (I.isAtomic())
+    return visitAtomicLoad(I);
+
    const Value *SV = I.getOperand(0);
    SDValue Ptr = getValue(SV);
  
-  const Type *Ty = I.getType();
+  Type *Ty = I.getType();
  
    bool isVolatile = I.isVolatile();
    bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+  bool isInvariant = I.getMetadata("invariant.load") != 0;
    unsigned Alignment = I.getAlignment();
    const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
  
@@ -3038,7 +3228,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
                              DAG.getConstant(Offsets[i], PtrVT));
      SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
                              A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
-                            isNonTemporal, Alignment, TBAAInfo);
+                            isNonTemporal, isInvariant, Alignment, TBAAInfo);
  
      Values[i] = L;
      Chains[ChainI] = L.getValue(1);
@@ -3059,6 +3249,9 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
  }
  
  void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+  if (I.isAtomic())
+    return visitAtomicStore(I);
+
    const Value *SrcV = I.getOperand(0);
    const Value *PtrV = I.getOperand(1);
  
@@ -3109,6 +3302,179 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
    DAG.setRoot(StoreNode);
  }
  
+static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
+                                    SynchronizationScope Scope,
+                                    bool Before, DebugLoc dl,
+                                    SelectionDAG &DAG,
+                                    const TargetLowering &TLI) {
+  // Fence, if necessary
+  if (Before) {
+    if (Order == AcquireRelease || Order == SequentiallyConsistent)
+      Order = Release;
+    else if (Order == Acquire || Order == Monotonic)
+      return Chain;
+  } else {
+    if (Order == AcquireRelease)
+      Order = Acquire;
+    else if (Order == Release || Order == Monotonic)
+      return Chain;
+  }
+  SDValue Ops[3];
+  Ops[0] = Chain;
+  Ops[1] = DAG.getConstant(Order, TLI.getPointerTy());
+  Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy());
+  return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3);
+}
+
+void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+  AtomicOrdering Order = I.getOrdering();
+  SynchronizationScope Scope = I.getSynchScope();
+
+  SDValue InChain = getRoot();
+
+  if (TLI.getInsertFencesForAtomic())
+    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+                                   DAG, TLI);
+
+  SDValue L =
+    DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
+                  getValue(I.getCompareOperand()).getValueType().getSimpleVT(),
+                  InChain,
+                  getValue(I.getPointerOperand()),
+                  getValue(I.getCompareOperand()),
+                  getValue(I.getNewValOperand()),
+                  MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  Scope);
+
+  SDValue OutChain = L.getValue(1);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+                                    DAG, TLI);
+
+  setValue(&I, L);
+  DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+  ISD::NodeType NT;
+  switch (I.getOperation()) {
+  default: llvm_unreachable("Unknown atomicrmw operation"); return;
+  case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
+  case AtomicRMWInst::Add:  NT = ISD::ATOMIC_LOAD_ADD; break;
+  case AtomicRMWInst::Sub:  NT = ISD::ATOMIC_LOAD_SUB; break;
+  case AtomicRMWInst::And:  NT = ISD::ATOMIC_LOAD_AND; break;
+  case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
+  case AtomicRMWInst::Or:   NT = ISD::ATOMIC_LOAD_OR; break;
+  case AtomicRMWInst::Xor:  NT = ISD::ATOMIC_LOAD_XOR; break;
+  case AtomicRMWInst::Max:  NT = ISD::ATOMIC_LOAD_MAX; break;
+  case AtomicRMWInst::Min:  NT = ISD::ATOMIC_LOAD_MIN; break;
+  case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
+  case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
+  }
+  AtomicOrdering Order = I.getOrdering();
+  SynchronizationScope Scope = I.getSynchScope();
+
+  SDValue InChain = getRoot();
+
+  if (TLI.getInsertFencesForAtomic())
+    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+                                   DAG, TLI);
+
+  SDValue L =
+    DAG.getAtomic(NT, dl,
+                  getValue(I.getValOperand()).getValueType().getSimpleVT(),
+                  InChain,
+                  getValue(I.getPointerOperand()),
+                  getValue(I.getValOperand()),
+                  I.getPointerOperand(), 0 /* Alignment */,
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  Scope);
+
+  SDValue OutChain = L.getValue(1);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+                                    DAG, TLI);
+
+  setValue(&I, L);
+  DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitFence(const FenceInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+  SDValue Ops[3];
+  Ops[0] = getRoot();
+  Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
+  Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
+  DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3));
+}
+
+void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+  AtomicOrdering Order = I.getOrdering();
+  SynchronizationScope Scope = I.getSynchScope();
+
+  SDValue InChain = getRoot();
+
+  EVT VT = EVT::getEVT(I.getType());
+
+  if (I.getAlignment() * 8 < VT.getSizeInBits())
+    report_fatal_error("Cannot generate unaligned atomic load");
+
+  SDValue L =
+    DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
+                  getValue(I.getPointerOperand()),
+                  I.getPointerOperand(), I.getAlignment(),
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  Scope);
+
+  SDValue OutChain = L.getValue(1);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+                                    DAG, TLI);
+
+  setValue(&I, L);
+  DAG.setRoot(OutChain);
+}
+
+void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
+  DebugLoc dl = getCurDebugLoc();
+
+  AtomicOrdering Order = I.getOrdering();
+  SynchronizationScope Scope = I.getSynchScope();
+
+  SDValue InChain = getRoot();
+
+  EVT VT = EVT::getEVT(I.getValueOperand()->getType());
+
+  if (I.getAlignment() * 8 < VT.getSizeInBits())
+    report_fatal_error("Cannot generate unaligned atomic store");
+
+  if (TLI.getInsertFencesForAtomic())
+    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
+                                   DAG, TLI);
+
+  SDValue OutChain =
+    DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
+                  InChain,
+                  getValue(I.getPointerOperand()),
+                  getValue(I.getValueOperand()),
+                  I.getPointerOperand(), I.getAlignment(),
+                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
+                  Scope);
+
+  if (TLI.getInsertFencesForAtomic())
+    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
+                                    DAG, TLI);
+
+  DAG.setRoot(OutChain);
+}
+
  /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
  /// node.
  void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
@@ -3188,7 +3554,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
    }
  
    if (!I.getType()->isVoidTy()) {
-    if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+    if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
        EVT VT = TLI.getValueType(PTy);
        Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
      }
@@ -3235,25 +3601,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
    return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
  }
  
-/// Inlined utility function to implement binary input atomic intrinsics for
-/// visitIntrinsicCall: I is a call instruction
-///                     Op is the associated NodeType for I
-const char *
-SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
-                                           ISD::NodeType Op) {
-  SDValue Root = getRoot();
-  SDValue L =
-    DAG.getAtomic(Op, getCurDebugLoc(),
-                  getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
-                  Root,
-                  getValue(I.getArgOperand(0)),
-                  getValue(I.getArgOperand(1)),
-                  I.getArgOperand(0));
-  setValue(&I, L);
-  DAG.setRoot(L.getValue(1));
-  return 0;
-}
-
  // implVisitAluOverflow - Lower arithmetic overflow instrinsics.
  const char *
  SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
@@ -4013,6 +4360,24 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
    return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
  }
  
+// getTruncatedArgReg - Find underlying register used for an truncated
+// argument.
+static unsigned getTruncatedArgReg(const SDValue &N) {
+  if (N.getOpcode() != ISD::TRUNCATE)
+    return 0;
+
+  const SDValue &Ext = N.getOperand(0);
+  if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){
+    const SDValue &CFR = Ext.getOperand(0);
+    if (CFR.getOpcode() == ISD::CopyFromReg)
+      return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
+    else
+      if (CFR.getOpcode() == ISD::TRUNCATE)
+        return getTruncatedArgReg(CFR);
+  }
+  return 0;
+}
+
  /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
  /// argument, create the corresponding DBG_VALUE machine instruction for it now.
  /// At the end of instruction selection, they will be inserted to the entry BB.
@@ -4034,19 +4399,17 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
      return false;
  
    unsigned Reg = 0;
-  if (Arg->hasByValAttr()) {
-    // Byval arguments' frame index is recorded during argument lowering.
-    // Use this info directly.
-    Reg = TRI->getFrameRegister(MF);
-    Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
-    // If byval argument ofset is not recorded then ignore this.
-    if (!Offset)
-      Reg = 0;
-  }
-
-  if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) {
-    Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
-    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+  // Some arguments' frame index is recorded during argument lowering.
+  Offset = FuncInfo.getArgumentFrameIndex(Arg);
+  if (Offset)
+      Reg = TRI->getFrameRegister(MF);
+
+  if (!Reg && N.getNode()) {
+    if (N.getOpcode() == ISD::CopyFromReg)
+      Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+    else
+      Reg = getTruncatedArgReg(N);
+    if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
        MachineRegisterInfo &RegInfo = MF.getRegInfo();
        unsigned PR = RegInfo.getLiveInPhysReg(Reg);
        if (PR)
@@ -4172,7 +4535,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
      MDNode *Variable = DI.getVariable();
      const Value *Address = DI.getAddress();
-    if (!Address || !DIVariable(DI.getVariable()).Verify())
+    if (!Address || !DIVariable(Variable).Verify())
        return 0;
  
      // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
@@ -4208,9 +4571,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
            SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
                                  0, dl, SDNodeOrder);
          else {
-          // Can't do anything with other non-AI cases yet.  This might be a
-          // parameter of a callee function that got inlined, for example.
-          DEBUG(dbgs() << "Dropping debug info for " << DI);
+          // Address is an argument, so try to emit its dbg value using
+          // virtual register info from the FuncInfo.ValueMap.
+          EmitFuncArgumentDbgValue(Address, Variable, 0, N);
            return 0;
          }
        } else if (AI)
@@ -4262,7 +4625,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      // debug info exists.
      ++SDNodeOrder;
      SDDbgValue *SDV;
-    if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
+    if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
        SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
        DAG.AddDbgValue(SDV, 0, false);
      } else {
@@ -4391,9 +4754,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      MMI.setCurrentCallSite(CI->getZExtValue());
      return 0;
    }
+  case Intrinsic::eh_sjlj_functioncontext: {
+    // Get and store the index of the function context.
+    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    AllocaInst *FnCtx =
+      cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
+    int FI = FuncInfo.StaticAllocaMap[FnCtx];
+    MFI->setFunctionContextIndex(FI);
+    return 0;
+  }
    case Intrinsic::eh_sjlj_setjmp: {
-    setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
-                             getValue(I.getArgOperand(0))));
+    SDValue Ops[2];
+    Ops[0] = getRoot();
+    Ops[1] = getValue(I.getArgOperand(0));
+    SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl,
+                             DAG.getVTList(MVT::i32, MVT::Other),
+                             Ops, 2);
+    setValue(&I, Op.getValue(0));
+    DAG.setRoot(Op.getValue(1));
      return 0;
    }
    case Intrinsic::eh_sjlj_longjmp: {
@@ -4401,11 +4779,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                              getRoot(), getValue(I.getArgOperand(0))));
      return 0;
    }
-  case Intrinsic::eh_sjlj_dispatch_setup: {
-    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
-                            getRoot()));
-    return 0;
-  }
  
    case Intrinsic::x86_mmx_pslli_w:
    case Intrinsic::x86_mmx_pslli_d:
@@ -4536,6 +4909,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    case Intrinsic::pow:
      visitPow(I);
      return 0;
+  case Intrinsic::fma:
+    setValue(&I, DAG.getNode(ISD::FMA, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1)),
+                             getValue(I.getArgOperand(2))));
+    return 0;
    case Intrinsic::convert_to_fp16:
      setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
                               MVT::i16, getValue(I.getArgOperand(0))));
@@ -4648,12 +5028,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
      Ops[5] = DAG.getSrcValue(F);
  
-    Res = DAG.getNode(ISD::TRAMPOLINE, dl,
-                      DAG.getVTList(TLI.getPointerTy(), MVT::Other),
-                      Ops, 6);
+    Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6);
  
-    setValue(&I, Res);
-    DAG.setRoot(Res.getValue(1));
+    DAG.setRoot(Res);
+    return 0;
+  }
+  case Intrinsic::adjust_trampoline: {
+    setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl,
+                             TLI.getPointerTy(),
+                             getValue(I.getArgOperand(0))));
      return 0;
    }
    case Intrinsic::gcroot:
@@ -4672,8 +5055,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    case Intrinsic::flt_rounds:
      setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
      return 0;
+
+  case Intrinsic::expect: {
+    // Just replace __builtin_expect(exp, c) with EXP.
+    setValue(&I, getValue(I.getArgOperand(0)));
+    return 0;
+  }
+
    case Intrinsic::trap: {
-    StringRef TrapFuncName = getTrapFunctionName();
+    StringRef TrapFuncName = TM.Options.getTrapFunctionName();
      if (TrapFuncName.empty()) {
        DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
        return 0;
@@ -4702,15 +5092,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
      return implVisitAluOverflow(I, ISD::SMULO);
  
    case Intrinsic::prefetch: {
-    SDValue Ops[4];
+    SDValue Ops[5];
      unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
      Ops[0] = getRoot();
      Ops[1] = getValue(I.getArgOperand(0));
      Ops[2] = getValue(I.getArgOperand(1));
      Ops[3] = getValue(I.getArgOperand(2));
+    Ops[4] = getValue(I.getArgOperand(3));
      DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
                                          DAG.getVTList(MVT::Other),
-                                        &Ops[0], 4,
+                                        &Ops[0], 5,
                                          EVT::getIntegerVT(*Context, 8),
                                          MachinePointerInfo(I.getArgOperand(0)),
                                          0, /* align */
@@ -4719,51 +5110,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                          rw==1)); /* write */
      return 0;
    }
-  case Intrinsic::memory_barrier: {
-    SDValue Ops[6];
-    Ops[0] = getRoot();
-    for (int x = 1; x < 6; ++x)
-      Ops[x] = getValue(I.getArgOperand(x - 1));
-
-    DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
-    return 0;
-  }
-  case Intrinsic::atomic_cmp_swap: {
-    SDValue Root = getRoot();
-    SDValue L =
-      DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
-                    getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
-                    Root,
-                    getValue(I.getArgOperand(0)),
-                    getValue(I.getArgOperand(1)),
-                    getValue(I.getArgOperand(2)),
-                    MachinePointerInfo(I.getArgOperand(0)));
-    setValue(&I, L);
-    DAG.setRoot(L.getValue(1));
-    return 0;
-  }
-  case Intrinsic::atomic_load_add:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
-  case Intrinsic::atomic_load_sub:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
-  case Intrinsic::atomic_load_or:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
-  case Intrinsic::atomic_load_xor:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
-  case Intrinsic::atomic_load_and:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
-  case Intrinsic::atomic_load_nand:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
-  case Intrinsic::atomic_load_max:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
-  case Intrinsic::atomic_load_min:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
-  case Intrinsic::atomic_load_umin:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
-  case Intrinsic::atomic_load_umax:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
-  case Intrinsic::atomic_swap:
-    return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
  
    case Intrinsic::invariant_start:
    case Intrinsic::lifetime_start:
@@ -4780,9 +5126,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
  void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                        bool isTailCall,
                                        MachineBasicBlock *LandingPad) {
-  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
-  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
-  const Type *RetTy = FTy->getReturnType();
+  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  Type *RetTy = FTy->getReturnType();
    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
    MCSymbol *BeginLabel = 0;
  
@@ -4797,7 +5143,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                  Outs, TLI, &Offsets);
  
    bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
-                        FTy->isVarArg(), Outs, FTy->getContext());
+                                          DAG.getMachineFunction(),
+                                          FTy->isVarArg(), Outs,
+                                          FTy->getContext());
  
    SDValue DemoteStackSlot;
    int DemoteStackIdx = -100;
@@ -4809,7 +5157,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                        FTy->getReturnType());
      MachineFunction &MF = DAG.getMachineFunction();
      DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
-    const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+    Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
  
      DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
      Entry.Node = DemoteStackSlot;
@@ -4827,8 +5175,14 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
  
    for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
         i != e; ++i) {
-    SDValue ArgNode = getValue(*i);
-    Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
+    const Value *V = *i;
+
+    // Skip empty types
+    if (V->getType()->isEmptyTy())
+      continue;
+
+    SDValue ArgNode = getValue(V);
+    Entry.Node = ArgNode; Entry.Ty = V->getType();
  
      unsigned attrInd = i - CS.arg_begin() + 1;
      Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
@@ -4851,6 +5205,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
      unsigned CallSiteIndex = MMI.getCurrentCallSite();
      if (CallSiteIndex) {
        MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+      LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
+
        // Now that the call site is handled, stop tracking it.
        MMI.setCurrentCallSite(0);
      }
@@ -4869,7 +5225,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
  
    // If there's a possibility that fast-isel has already selected some amount
    // of the current basic block, don't emit a tail call.
-  if (isTailCall && EnableFastISel)
+  if (isTailCall && TM.Options.EnableFastISel)
      isTailCall = false;
  
    std::pair<SDValue,SDValue> Result =
@@ -4891,7 +5247,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
      // The instruction result is the result of loading from the
      // hidden sret parameter.
      SmallVector<EVT, 1> PVTs;
-    const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+    Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
  
      ComputeValueVTs(TLI, PtrRetTy, PVTs);
      assert(PVTs.size() == 1 && "Pointers should fit in one register");
@@ -4907,7 +5263,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
        SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
                                Add,
                    MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
-                              false, false, 1);
+                              false, false, false, 1);
        Values[i] = L;
        Chains[i] = L.getValue(1);
      }
@@ -4984,7 +5340,7 @@ static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
  }
  
  static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
-                             const Type *LoadTy,
+                             Type *LoadTy,
                               SelectionDAGBuilder &Builder) {
  
    // Check to see if this load can be trivially constant folded, e.g. if the
@@ -5018,7 +5374,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
    SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
                                          Ptr, MachinePointerInfo(PtrVal),
                                          false /*volatile*/,
-                                        false /*nontemporal*/, 1 /* align=1 */);
+                                        false /*nontemporal*/, 
+                                        false /*isinvariant*/, 1 /* align=1 */);
  
    if (!ConstantMemory)
      Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -5047,7 +5404,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
    if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
      bool ActuallyDoIt = true;
      MVT LoadVT;
-    const Type *LoadTy;
+    Type *LoadTy;
      switch (Size->getZExtValue()) {
      default:
        LoadVT = MVT::Other;
@@ -5115,14 +5472,14 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
  
    // See if any floating point values are being passed to this function. This is
    // used to emit an undefined reference to fltused on Windows.
-  const FunctionType *FT =
+  FunctionType *FT =
      cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
    if (FT->isVarArg() &&
        !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
      for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
-      const Type* T = I.getArgOperand(i)->getType();
-      for (po_iterator<const Type*> i = po_begin(T), e = po_end(T);
+      Type* T = I.getArgOperand(i)->getType();
+      for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
             i != e; ++i) {
          if (!i->isFloatingPointTy()) continue;
          MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
@@ -5266,20 +5623,20 @@ public:
      if (isa<BasicBlock>(CallOperandVal))
        return TLI.getPointerTy();
  
-    const llvm::Type *OpTy = CallOperandVal->getType();
+    llvm::Type *OpTy = CallOperandVal->getType();
  
      // FIXME: code duplicated from TargetLowering::ParseConstraints().
      // If this is an indirect operand, the operand is a pointer to the
      // accessed type.
      if (isIndirect) {
-      const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+      llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
        if (!PtrTy)
          report_fatal_error("Indirect operand for inline asm not a pointer!");
        OpTy = PtrTy->getElementType();
      }
  
      // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
-    if (const StructType *STy = dyn_cast<StructType>(OpTy))
+    if (StructType *STy = dyn_cast<StructType>(OpTy))
        if (STy->getNumElements() == 1)
          OpTy = STy->getElementType(0);
  
@@ -5320,54 +5677,6 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
  
  } // end anonymous namespace
  
-/// isAllocatableRegister - If the specified register is safe to allocate,
-/// i.e. it isn't a stack pointer or some other special register, return the
-/// register class for the register.  Otherwise, return null.
-static const TargetRegisterClass *
-isAllocatableRegister(unsigned Reg, MachineFunction &MF,
-                      const TargetLowering &TLI,
-                      const TargetRegisterInfo *TRI) {
-  EVT FoundVT = MVT::Other;
-  const TargetRegisterClass *FoundRC = 0;
-  for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
-       E = TRI->regclass_end(); RCI != E; ++RCI) {
-    EVT ThisVT = MVT::Other;
-
-    const TargetRegisterClass *RC = *RCI;
-    // If none of the value types for this register class are valid, we
-    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
-    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
-         I != E; ++I) {
-      if (TLI.isTypeLegal(*I)) {
-        // If we have already found this register in a different register class,
-        // choose the one with the largest VT specified.  For example, on
-        // PowerPC, we favor f64 register classes over f32.
-        if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
-          ThisVT = *I;
-          break;
-        }
-      }
-    }
-
-    if (ThisVT == MVT::Other) continue;
-
-    // NOTE: This isn't ideal.  In particular, this might allocate the
-    // frame pointer in functions that need it (due to them not being taken
-    // out of allocation, because a variable sized allocation hasn't been seen
-    // yet).  This is a slight code pessimization, but should still work.
-    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
-         E = RC->allocation_order_end(MF); I != E; ++I)
-      if (*I == Reg) {
-        // We found a matching register class.  Keep looking at others in case
-        // we find one with larger registers that this physreg is also in.
-        FoundRC = RC;
-        FoundVT = ThisVT;
-        break;
-      }
-  }
-  return FoundRC;
-}
-
  /// GetRegistersForValue - Assign registers (virtual or physical) for the
  /// specified operand.  We prefer to assign virtual registers, to allow the
  /// register allocator to handle the assignment process.  However, if the asm
@@ -5502,52 +5811,6 @@ static void GetRegistersForValue(SelectionDAG &DAG,
      return;
    }
  
-  // This is a reference to a register class that doesn't directly correspond
-  // to an LLVM register class.  Allocate NumRegs consecutive, available,
-  // registers from the class.
-  std::vector<unsigned> RegClassRegs
-    = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                            OpInfo.ConstraintVT);
-
-  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
-  unsigned NumAllocated = 0;
-  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
-    unsigned Reg = RegClassRegs[i];
-    // See if this register is available.
-    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
-        (isInReg  && InputRegs.count(Reg))) {    // Already used.
-      // Make sure we find consecutive registers.
-      NumAllocated = 0;
-      continue;
-    }
-
-    // Check to see if this register is allocatable (i.e. don't give out the
-    // stack pointer).
-    const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
-    if (!RC) {        // Couldn't allocate this register.
-      // Reset NumAllocated to make sure we return consecutive registers.
-      NumAllocated = 0;
-      continue;
-    }
-
-    // Okay, this register is good, we can use it.
-    ++NumAllocated;
-
-    // If we allocated enough consecutive registers, succeed.
-    if (NumAllocated == NumRegs) {
-      unsigned RegStart = (i-NumAllocated)+1;
-      unsigned RegEnd   = i+1;
-      // Mark all of the allocated registers used.
-      for (unsigned i = RegStart; i != RegEnd; ++i)
-        Regs.push_back(RegClassRegs[i]);
-
-      OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(),
-                                         OpInfo.ConstraintVT);
-      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
-      return;
-    }
-  }
-
    // Otherwise, we couldn't allocate enough registers for this.
  }
  
@@ -5585,9 +5848,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
  
        // The return value of the call is this value.  As such, there is no
        // corresponding argument.
-      assert(!CS.getType()->isVoidTy() &&
-             "Bad inline asm!");
-      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+      assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+      if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
          OpVT = TLI.getValueType(STy->getElementType(ResNo));
        } else {
          assert(ResNo == 0 && "Asm only has one result!");
@@ -5654,10 +5916,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
  
        if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+       std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+         TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                           OpInfo.ConstraintVT);
+       std::pair<unsigned, const TargetRegisterClass*> InputRC =
+         TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
+                                           Input.ConstraintVT);
          if ((OpInfo.ConstraintVT.isInteger() !=
               Input.ConstraintVT.isInteger()) ||
-            (OpInfo.ConstraintVT.getSizeInBits() !=
-             Input.ConstraintVT.getSizeInBits())) {
+            (MatchRC.second != InputRC.second)) {
            report_fatal_error("Unsupported asm: input constraint"
                               " with a matching output constraint of"
                               " incompatible type!");
@@ -5680,6 +5947,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        // Memory operands really want the address of the value.  If we don't have
        // an indirect input, put it in the constpool if we can, otherwise spill
        // it to a stack slot.
+      // TODO: This isn't quite right. We need to handle these according to
+      // the addressing mode that the constraint wants. Also, this may take
+      // an additional register for the computation and we don't want that
+      // either.
  
        // If the operand is a float, integer, or vector constant, spill to a
        // constant pool entry to get its address.
@@ -5691,7 +5962,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        } else {
          // Otherwise, create a stack slot and emit a store to it before the
          // asm.
-        const Type *Ty = OpVal->getType();
+        Type *Ty = OpVal->getType();
          uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
          unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
          MachineFunction &MF = DAG.getMachineFunction();
@@ -5881,7 +6152,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
  
        if (OpInfo.ConstraintType == TargetLowering::C_Other) {
          std::vector<SDValue> Ops;
-        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
+        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
                                           Ops, DAG);
          if (Ops.empty())
            report_fatal_error("Invalid operand for inline asm constraint '" +
@@ -5916,8 +6187,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
               "Don't know how to handle indirect register inputs yet!");
  
        // Copy the input into the appropriate registers.
-      if (OpInfo.AssignedRegs.Regs.empty() ||
-          !OpInfo.AssignedRegs.areValueTypesLegal(TLI))
+      if (OpInfo.AssignedRegs.Regs.empty())
          report_fatal_error("Couldn't allocate input reg for constraint '" +
                             Twine(OpInfo.ConstraintCode) + "'!");
  
@@ -5932,8 +6202,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
        // Add the clobbered value to the operand list, so that the register
        // allocator is aware that the physreg got clobbered.
        if (!OpInfo.AssignedRegs.Regs.empty())
-        OpInfo.AssignedRegs.AddInlineAsmOperands(
-                                            InlineAsm::Kind_RegDefEarlyClobber,
+        OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
                                                   false, 0, DAG,
                                                   AsmNodeOperands);
        break;
@@ -6054,7 +6323,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
  /// FIXME: When all targets are
  /// migrated to using LowerCall, this hook should be integrated into SDISel.
  std::pair<SDValue, SDValue>
-TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
                              bool RetSExt, bool RetZExt, bool isVarArg,
                              bool isInreg, unsigned NumFixedArgs,
                              CallingConv::ID CallConv, bool isTailCall,
@@ -6071,7 +6340,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
      for (unsigned Value = 0, NumValues = ValueVTs.size();
           Value != NumValues; ++Value) {
        EVT VT = ValueVTs[Value];
-      const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+      Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
        SDValue Op = SDValue(Args[i].Node.getNode(),
                             Args[i].Node.getResNo() + Value);
        ISD::ArgFlagsTy Flags;
@@ -6088,16 +6357,17 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
          Flags.setSRet();
        if (Args[i].isByVal) {
          Flags.setByVal();
-        const PointerType *Ty = cast<PointerType>(Args[i].Ty);
-        const Type *ElementTy = Ty->getElementType();
-        unsigned FrameAlign = getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
+        PointerType *Ty = cast<PointerType>(Args[i].Ty);
+        Type *ElementTy = Ty->getElementType();
+        Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
          // For ByVal, alignment should come from FE.  BE will guess if this
          // info is not there but there are cases it cannot get right.
+        unsigned FrameAlign;
          if (Args[i].Alignment)
            FrameAlign = Args[i].Alignment;
+        else
+          FrameAlign = getByValTypeAlignment(ElementTy);
          Flags.setByValAlign(FrameAlign);
-        Flags.setByValSize(FrameSize);
        }
        if (Args[i].isNest)
          Flags.setNest();
@@ -6245,10 +6515,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
  /// isOnlyUsedInEntryBlock - If the specified argument is only used in the
  /// entry block, return true.  This includes arguments used by switches, since
  /// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(const Argument *A) {
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
    // With FastISel active, we may be splitting blocks, so force creation
    // of virtual registers for all non-dead arguments.
-  if (EnableFastISel)
+  if (FastISel)
      return A->use_empty();
  
    const BasicBlock *Entry = A->getParent()->begin();
@@ -6298,7 +6568,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
      for (unsigned Value = 0, NumValues = ValueVTs.size();
           Value != NumValues; ++Value) {
        EVT VT = ValueVTs[Value];
-      const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+      Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
        ISD::ArgFlagsTy Flags;
        unsigned OriginalAlignment =
          TD->getABITypeAlignment(ArgTy);
@@ -6313,16 +6583,17 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
          Flags.setSRet();
        if (F.paramHasAttr(Idx, Attribute::ByVal)) {
          Flags.setByVal();
-        const PointerType *Ty = cast<PointerType>(I->getType());
-        const Type *ElementTy = Ty->getElementType();
-        unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
-        unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
+        PointerType *Ty = cast<PointerType>(I->getType());
+        Type *ElementTy = Ty->getElementType();
+        Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
          // For ByVal, alignment should be passed from FE.  BE will guess if
          // this info is not there but there are cases it cannot get right.
+        unsigned FrameAlign;
          if (F.getParamAlignment(Idx))
            FrameAlign = F.getParamAlignment(Idx);
+        else
+          FrameAlign = TLI.getByValTypeAlignment(ElementTy);
          Flags.setByValAlign(FrameAlign);
-        Flags.setByValSize(FrameSize);
        }
        if (F.paramHasAttr(Idx, Attribute::Nest))
          Flags.setNest();
@@ -6428,29 +6699,37 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
      if (ArgValues.empty())
        continue;
  
-    // Note down frame index for byval arguments.
-    if (I->hasByValAttr())
-      if (FrameIndexSDNode *FI =
-          dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
-        FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
+    // Note down frame index.
+    if (FrameIndexSDNode *FI =
+       dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+      FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
  
      SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
                                       SDB->getCurDebugLoc());
+
      SDB->setValue(I, Res);
+    if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+      if (LoadSDNode *LNode = 
+          dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
+        if (FrameIndexSDNode *FI =
+            dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+        FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+    }
  
      // If this argument is live outside of the entry block, insert a copy from
      // wherever we got it to the vreg that other BB's will reference it as.
-    if (Res.getOpcode() == ISD::CopyFromReg) {
+    if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
        // If we can, though, try to skip creating an unnecessary vreg.
        // FIXME: This isn't very clean... it would be nice to make this more
-      // general.
+      // general.  It's also subtly incompatible with the hacks FastISel
+      // uses with vregs.
        unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
        if (TargetRegisterInfo::isVirtualRegister(Reg)) {
          FuncInfo->ValueMap[I] = Reg;
          continue;
        }
      }
-    if (!isOnlyUsedInEntryBlock(I)) {
+    if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
        FuncInfo->InitializeRegForValue(I);
        SDB->CopyToExportRegsIfNeeded(I);
      }
@@ -6497,6 +6776,10 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
        // Ignore dead phi's.
        if (PN->use_empty()) continue;
  
+      // Skip empty types
+      if (PN->getType()->isEmptyTy())
+        continue;
+
        unsigned Reg;
        const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);