Fix more code to work properly with vector operands. Based on

[oota-llvm.git] / lib / CodeGen / SelectionDAG / DAGCombiner.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 3bc9cf06adb04c2641020dff200e9424af0702db..3be6b431167a759418f29b8cd3c9639a99800989 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19,6 +19,7 @@
  #define DEBUG_TYPE "dagcombine"
  #include "llvm/CodeGen/SelectionDAG.h"
  #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/PseudoSourceValue.h"
@@ -30,12 +31,12 @@
  #include "llvm/Target/TargetOptions.h"
  #include "llvm/ADT/SmallPtrSet.h"
  #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/Compiler.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
  #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
  #include <algorithm>
-#include <set>
  using namespace llvm;
  
  STATISTIC(NodesCombined   , "Number of dag nodes combined");
@@ -54,7 +55,7 @@ namespace {
  
  //------------------------------ DAGCombiner ---------------------------------//
  
-  class VISIBILITY_HIDDEN DAGCombiner {
+  class DAGCombiner {
      SelectionDAG &DAG;
      const TargetLowering &TLI;
      CombineLevel Level;
@@ -118,7 +119,8 @@ namespace {
      /// it can be simplified or if things it uses can be simplified by bit
      /// propagation.  If so, return true.
      bool SimplifyDemandedBits(SDValue Op) {
-      APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits());
+      unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+      APInt Demanded = APInt::getAllOnesValue(BitWidth);
        return SimplifyDemandedBits(Op, Demanded);
      }
  
@@ -213,12 +215,12 @@ namespace {
      SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
                               SDValue N3, ISD::CondCode CC,
                               bool NotExtCompare = false);
-    SDValue SimplifySetCC(MVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+    SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
                            DebugLoc DL, bool foldBooleans = true);
      SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
                                           unsigned HiOp);
-    SDValue CombineConsecutiveLoads(SDNode *N, MVT VT);
-    SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, MVT);
+    SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+    SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
      SDValue BuildSDIV(SDNode *N);
      SDValue BuildUDIV(SDNode *N);
      SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
@@ -236,14 +238,17 @@ namespace {
      /// overlap.
      bool isAlias(SDValue Ptr1, int64_t Size1,
                   const Value *SrcValue1, int SrcValueOffset1,
+                 unsigned SrcValueAlign1,
                   SDValue Ptr2, int64_t Size2,
-                 const Value *SrcValue2, int SrcValueOffset2) const;
+                 const Value *SrcValue2, int SrcValueOffset2,
+                 unsigned SrcValueAlign2) const;
  
      /// FindAliasInfo - Extracts the relevant alias information from the memory
      /// node.  Returns true if the operand was a load.
      bool FindAliasInfo(SDNode *N,
                         SDValue &Ptr, int64_t &Size,
-                       const Value *&SrcValue, int &SrcValueOffset) const;
+                       const Value *&SrcValue, int &SrcValueOffset,
+                       unsigned &SrcValueAlignment) const;
  
      /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
      /// looking for a better chain (aliasing node.)
@@ -251,7 +256,7 @@ namespace {
  
      /// getShiftAmountTy - Returns a type large enough to hold any valid
      /// shift amount - before type legalization these can be huge.
-    MVT getShiftAmountTy() {
+    EVT getShiftAmountTy() {
        return LegalTypes ?  TLI.getShiftAmountTy() : TLI.getPointerTy();
      }
  
@@ -274,8 +279,7 @@ public:
  namespace {
  /// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
  /// nodes from the worklist.
-class VISIBILITY_HIDDEN WorkListRemover :
-  public SelectionDAG::DAGUpdateListener {
+class WorkListRemover : public SelectionDAG::DAGUpdateListener {
    DAGCombiner &DC;
  public:
    explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
@@ -392,7 +396,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
  
    assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
    switch (Op.getOpcode()) {
-  default: assert(0 && "Unknown code");
+  default: llvm_unreachable("Unknown code");
    case ISD::ConstantFP: {
      APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
      V.changeSign();
@@ -495,7 +499,7 @@ static bool isOneUseSetCC(SDValue N) {
  
  SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
                                      SDValue N0, SDValue N1) {
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
    if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
      if (isa<ConstantSDNode>(N1)) {
        // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
@@ -537,11 +541,14 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
                                 bool AddTo) {
    assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
    ++NodesCombined;
-  DOUT << "\nReplacing.1 "; DEBUG(N->dump(&DAG));
-  DOUT << "\nWith: "; DEBUG(To[0].getNode()->dump(&DAG));
-  DOUT << " and " << NumTo-1 << " other values\n";
-  DEBUG(for (unsigned i = 0, e = NumTo; i != e; ++i)
-          assert(N->getValueType(i) == To[i].getValueType() &&
+  DEBUG(dbgs() << "\nReplacing.1 ";
+        N->dump(&DAG);
+        dbgs() << "\nWith: ";
+        To[0].getNode()->dump(&DAG);
+        dbgs() << " and " << NumTo-1 << " other values\n";
+        for (unsigned i = 0, e = NumTo; i != e; ++i)
+          assert((!To[i].getNode() ||
+                  N->getValueType(i) == To[i].getValueType()) &&
                   "Cannot combine value to value of different type!"));
    WorkListRemover DeadNodes(*this);
    DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
@@ -612,9 +619,11 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
  
    // Replace the old value with the new one.
    ++NodesCombined;
-  DOUT << "\nReplacing.2 "; DEBUG(TLO.Old.getNode()->dump(&DAG));
-  DOUT << "\nWith: "; DEBUG(TLO.New.getNode()->dump(&DAG));
-  DOUT << '\n';
+  DEBUG(dbgs() << "\nReplacing.2 "; 
+        TLO.Old.getNode()->dump(&DAG);
+        dbgs() << "\nWith: ";
+        TLO.New.getNode()->dump(&DAG);
+        dbgs() << '\n');
  
    CommitTargetLoweringOpt(TLO);
    return true;
@@ -680,9 +689,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
             RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
             "Node was deleted but visit returned new node!");
  
-    DOUT << "\nReplacing.3 "; DEBUG(N->dump(&DAG));
-    DOUT << "\nWith: "; DEBUG(RV.getNode()->dump(&DAG));
-    DOUT << '\n';
+    DEBUG(dbgs() << "\nReplacing.3 "; 
+          N->dump(&DAG);
+          dbgs() << "\nWith: ";
+          RV.getNode()->dump(&DAG);
+          dbgs() << '\n');
      WorkListRemover DeadNodes(*this);
      if (N->getNumValues() == RV.getNode()->getNumValues())
        DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
@@ -800,7 +811,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
  
        // Expose the DAG combiner to the target combiner impls.
        TargetLowering::DAGCombinerInfo
-        DagCombineInfo(DAG, Level == Unrestricted, false, this);
+        DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
  
        RV = TLI.PerformDAGCombine(N, DagCombineInfo);
      }
@@ -877,7 +888,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
          break;
  
        case ISD::TokenFactor:
-        if ((CombinerAA || Op.hasOneUse()) &&
+        if (Op.hasOneUse() &&
              std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
            // Queue up for processing.
            TFs.push_back(Op.getNode());
@@ -898,7 +909,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
        }
      }
    }
-
+  
    SDValue Result;
  
    // If we've change things around then replace token factor.
@@ -922,9 +933,14 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
  /// MERGE_VALUES can always be eliminated.
  SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
    WorkListRemover DeadNodes(*this);
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
-                                  &DeadNodes);
+  // Replacing results may cause a different MERGE_VALUES to suddenly
+  // be CSE'd with N, and carry its uses with it. Iterate until no
+  // uses remain, to ensure that the node can be safely deleted.
+  do {
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
+                                    &DeadNodes);
+  } while (!N->use_empty());
    removeFromWorkList(N);
    DAG.DeleteNode(N);
    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -933,7 +949,7 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
  static
  SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
                                SelectionDAG &DAG) {
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
    SDValue N00 = N0.getOperand(0);
    SDValue N01 = N0.getOperand(1);
    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
@@ -957,7 +973,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
  
    // fold vector ops
    if (VT.isVector()) {
@@ -1048,7 +1064,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
    if (VT.isInteger() && !VT.isVector()) {
      APInt LHSZero, LHSOne;
      APInt RHSZero, RHSOne;
-    APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
      DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
  
      if (LHSZero.getBoolValue()) {
@@ -1072,6 +1088,26 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
      if (Result.getNode()) return Result;
    }
  
+  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
+  if (N1.getOpcode() == ISD::SHL &&
+      N1.getOperand(0).getOpcode() == ISD::SUB)
+    if (ConstantSDNode *C =
+          dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
+      if (C->getAPIntValue() == 0)
+        return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0,
+                           DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                                       N1.getOperand(0).getOperand(1),
+                                       N1.getOperand(1)));
+  if (N0.getOpcode() == ISD::SHL &&
+      N0.getOperand(0).getOpcode() == ISD::SUB)
+    if (ConstantSDNode *C =
+          dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
+      if (C->getAPIntValue() == 0)
+        return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1,
+                           DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                                       N0.getOperand(0).getOperand(1),
+                                       N0.getOperand(1)));
+
    return SDValue();
  }
  
@@ -1080,7 +1116,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
  
    // If the flag result is dead, turn this into an ADD.
    if (N->hasNUsesOfValue(0, 1))
@@ -1100,7 +1136,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
    // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
    APInt LHSZero, LHSOne;
    APInt RHSZero, RHSOne;
-  APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+  APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
    DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
  
    if (LHSZero.getBoolValue()) {
@@ -1142,7 +1178,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
  
    // fold vector ops
    if (VT.isVector()) {
@@ -1160,6 +1196,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
    if (N1C)
      return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
                         DAG.getConstant(-N1C->getAPIntValue(), VT));
+  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
+  if (N0C && N0C->isAllOnesValue())
+    return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
    // fold (A+B)-A -> B
    if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
      return N0.getOperand(1);
@@ -1215,7 +1254,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
  
    // fold vector ops
    if (VT.isVector()) {
@@ -1308,7 +1347,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold vector ops
    if (VT.isVector()) {
@@ -1395,7 +1434,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold vector ops
    if (VT.isVector()) {
@@ -1415,7 +1454,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
    if (N1.getOpcode() == ISD::SHL) {
      if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
        if (SHC->getAPIntValue().isPowerOf2()) {
-        MVT ADDVT = N1.getOperand(1).getValueType();
+        EVT ADDVT = N1.getOperand(1).getValueType();
          SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
                                    N1.getOperand(1),
                                    DAG.getConstant(SHC->getAPIntValue()
@@ -1447,7 +1486,7 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (srem c1, c2) -> c1%c2
    if (N0C && N1C && !N1C->isNullValue())
@@ -1489,7 +1528,7 @@ SDValue DAGCombiner::visitUREM(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (urem c1, c2) -> c1%c2
    if (N0C && N1C && !N1C->isNullValue())
@@ -1541,7 +1580,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (mulhs x, 0) -> 0
    if (N1C && N1C->isNullValue())
@@ -1562,7 +1601,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (mulhu x, 0) -> 0
    if (N1C && N1C->isNullValue())
@@ -1665,19 +1704,28 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
  /// two operands of the same opcode, try to simplify it.
  SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
    SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
    assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
  
+  // Bail early if none of these transforms apply.
+  if (N0.getNode()->getNumOperands() == 0) return SDValue();
+
    // For each of OP in AND/OR/XOR:
    // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
    // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
    // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
-  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
-  if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
+  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
+  //
+  // do not sink logical op inside of a vector extend, since it may combine
+  // into a vsetcc.
+  EVT Op0VT = N0.getOperand(0).getValueType();
+  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+       N0.getOpcode() == ISD::ANY_EXTEND  ||
         N0.getOpcode() == ISD::SIGN_EXTEND ||
-       (N0.getOpcode() == ISD::TRUNCATE &&
-        !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
-      N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) {
+       (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
+      !VT.isVector() &&
+      Op0VT == N1.getOperand(0).getValueType() &&
+      (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
      SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
                                   N0.getOperand(0).getValueType(),
                                   N0.getOperand(0), N1.getOperand(0));
@@ -1709,8 +1757,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    SDValue LL, LR, RL, RR, CC0, CC1;
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N1.getValueType();
-  unsigned BitWidth = VT.getSizeInBits();
+  EVT VT = N1.getValueType();
+  unsigned BitWidth = VT.getScalarType().getSizeInBits();
  
    // fold vector ops
    if (VT.isVector()) {
@@ -1738,7 +1786,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
    if (RAND.getNode() != 0)
      return RAND;
-  // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+  // fold (and (or x, C), D) -> D if (C & D) == D
    if (N1C && N0.getOpcode() == ISD::OR)
      if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
        if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
@@ -1817,22 +1865,24 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    if (!VT.isVector() &&
        SimplifyDemandedBits(SDValue(N, 0)))
      return SDValue(N, 0);
+
    // fold (zext_inreg (extload x)) -> (zextload x)
    if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
      // If we zero all the possible extended bits, then we can turn this into
      // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueSizeInBits();
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
      if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - EVT.getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
          ((!LegalOperations && !LN0->isVolatile()) ||
-         TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                         LN0->getChain(), LN0->getBasePtr(),
                                         LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), EVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
        AddToWorkList(N);
        CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1842,19 +1892,20 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
    if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
        N0.hasOneUse()) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
      // If we zero all the possible extended bits, then we can turn this into
      // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueSizeInBits();
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
      if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                                     BitWidth - EVT.getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
          ((!LegalOperations && !LN0->isVolatile()) ||
-         TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
                                         LN0->getChain(),
                                         LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), EVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
        AddToWorkList(N);
        CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -1863,48 +1914,71 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
  
    // fold (and (load x), 255) -> (zextload x, i8)
    // fold (and (extload x, i16), 255) -> (zextload x, i8)
-  if (N1C && N0.getOpcode() == ISD::LOAD) {
-    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+  if (N1C && (N0.getOpcode() == ISD::LOAD ||
+              (N0.getOpcode() == ISD::ANY_EXTEND &&
+               N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+    bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+    LoadSDNode *LN0 = HasAnyExt
+      ? cast<LoadSDNode>(N0.getOperand(0))
+      : cast<LoadSDNode>(N0);
      if (LN0->getExtensionType() != ISD::SEXTLOAD &&
-        LN0->isUnindexed() && N0.hasOneUse() &&
-        // Do not change the width of a volatile load.
-        !LN0->isVolatile()) {
-      MVT EVT = MVT::Other;
+        LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) {
        uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
-      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue()))
-        EVT = MVT::getIntegerVT(ActiveBits);
-
-      MVT LoadedVT = LN0->getMemoryVT();
-
-      // Do not generate loads of non-round integer types since these can
-      // be expensive (and would be wrong if the type is not byte sized).
-      if (EVT != MVT::Other && LoadedVT.bitsGT(EVT) && EVT.isRound() &&
-          (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT))) {
-        MVT PtrType = N0.getOperand(1).getValueType();
-
-        // For big endian targets, we need to add an offset to the pointer to
-        // load the correct bytes.  For little endian systems, we merely need to
-        // read fewer bytes from the same pointer.
-        unsigned LVTStoreBytes = LoadedVT.getStoreSizeInBits()/8;
-        unsigned EVTStoreBytes = EVT.getStoreSizeInBits()/8;
-        unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
-        unsigned Alignment = LN0->getAlignment();
-        SDValue NewPtr = LN0->getBasePtr();
-
-        if (TLI.isBigEndian()) {
-          NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
-                               NewPtr, DAG.getConstant(PtrOff, PtrType));
-          Alignment = MinAlign(Alignment, PtrOff);
+      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+        EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+        EVT LoadedVT = LN0->getMemoryVT();
+
+        if (ExtVT == LoadedVT &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+          
+          SDValue NewLoad = 
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+                           LN0->getChain(), LN0->getBasePtr(),
+                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                           LN0->getAlignment());
+          AddToWorkList(N);
+          CombineTo(LN0, NewLoad, NewLoad.getValue(1));
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
          }
+        
+        // Do not change the width of a volatile load.
+        // Do not generate loads of non-round integer types since these can
+        // be expensive (and would be wrong if the type is not byte sized).
+        if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          EVT PtrType = LN0->getOperand(1).getValueType();
+
+          unsigned Alignment = LN0->getAlignment();
+          SDValue NewPtr = LN0->getBasePtr();
+
+          // For big endian targets, we need to add an offset to the pointer
+          // to load the correct bytes.  For little endian systems, we merely
+          // need to read fewer bytes from the same pointer.
+          if (TLI.isBigEndian()) {
+            unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+            unsigned EVTStoreBytes = ExtVT.getStoreSize();
+            unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+            NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+                                 NewPtr, DAG.getConstant(PtrOff, PtrType));
+            Alignment = MinAlign(Alignment, PtrOff);
+          }
  
-        AddToWorkList(NewPtr.getNode());
-        SDValue Load =
-          DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(),
-                         NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                         EVT, LN0->isVolatile(), Alignment);
-        AddToWorkList(N);
-        CombineTo(N0.getNode(), Load, Load.getValue(1));
-        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+          AddToWorkList(NewPtr.getNode());
+          
+          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+          SDValue Load =
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+                           LN0->getChain(), NewPtr,
+                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
+                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                           Alignment);
+          AddToWorkList(N);
+          CombineTo(LN0, Load, Load.getValue(1));
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+        }
        }
      }
    }
@@ -1918,7 +1992,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
    SDValue LL, LR, RL, RR, CC0, CC1;
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N1.getValueType();
+  EVT VT = N1.getValueType();
  
    // fold vector ops
    if (VT.isVector()) {
@@ -1927,8 +2001,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
    }
  
    // fold (or x, undef) -> -1
-  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(~0ULL, VT);
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) {
+    EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+    return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+  }
    // fold (or c1, c2) -> c1|c2
    if (N0C && N1C)
      return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
@@ -1949,13 +2025,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
    if (ROR.getNode() != 0)
      return ROR;
    // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+  // iff (c1 & c2) == 0.
    if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
               isa<ConstantSDNode>(N0.getOperand(1))) {
      ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
-    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
-                       DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
-                                   N0.getOperand(0), N1),
-                       DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+    if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                         DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+                                     N0.getOperand(0), N1),
+                         DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
    }
    // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
    if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
@@ -2058,7 +2136,7 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
  // a rot[lr].
  SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
    // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
-  MVT VT = LHS.getValueType();
+  EVT VT = LHS.getValueType();
    if (!TLI.isTypeLegal(VT)) return 0;
  
    // The target must have at least one rotate flavor.
@@ -2219,7 +2297,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
    SDValue LHS, RHS, CC;
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
  
    // fold vector ops
    if (VT.isVector()) {
@@ -2258,8 +2336,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
      if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
        switch (N0.getOpcode()) {
        default:
-        assert(0 && "Unhandled SetCC Equivalent!");
-        abort();
+        llvm_unreachable("Unhandled SetCC Equivalent!");
        case ISD::SETCC:
          return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
        case ISD::SELECT_CC:
@@ -2388,7 +2465,7 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
        !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
      return SDValue();
  
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // If this is a signed shift right, and the high bit is modified by the
    // logical operation, do not perform the transformation. The highBitSet
@@ -2418,8 +2495,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
-  unsigned OpSizeInBits = VT.getSizeInBits();
+  EVT VT = N0.getValueType();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
  
    // fold (shl c1, c2) -> c1<<c2
    if (N0C && N1C)
@@ -2435,7 +2512,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
      return N0;
    // if (shl x, c) is known to be zero, return 0
    if (DAG.MaskedValueIsZero(SDValue(N, 0),
-                            APInt::getAllOnesValue(VT.getSizeInBits())))
+                            APInt::getAllOnesValue(OpSizeInBits)))
      return DAG.getConstant(0, VT);
    // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
    if (N1.getOpcode() == ISD::TRUNCATE &&
@@ -2443,7 +2520,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
        N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
      SDValue N101 = N1.getOperand(0).getOperand(1);
      if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      MVT TruncVT = N1.getValueType();
+      EVT TruncVT = N1.getValueType();
        SDValue N100 = N1.getOperand(0).getOperand(0);
        APInt TruncC = N101C->getAPIntValue();
        TruncC.trunc(TruncVT.getSizeInBits());
@@ -2474,20 +2551,33 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
    if (N1C && N0.getOpcode() == ISD::SRL &&
        N0.getOperand(1).getOpcode() == ISD::Constant) {
      uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
-    uint64_t c2 = N1C->getZExtValue();
-    SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, N0.getOperand(0),
-                               DAG.getConstant(~0ULL << c1, VT));
-    if (c2 > c1)
-      return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
-                         DAG.getConstant(c2-c1, N1.getValueType()));
-    else
-      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
-                         DAG.getConstant(c1-c2, N1.getValueType()));
+    if (c1 < VT.getSizeInBits()) {
+      uint64_t c2 = N1C->getZExtValue();
+      SDValue HiBitsMask =
+        DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+                                              VT.getSizeInBits() - c1),
+                        VT);
+      SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT,
+                                 N0.getOperand(0),
+                                 HiBitsMask);
+      if (c2 > c1)
+        return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
+                           DAG.getConstant(c2-c1, N1.getValueType()));
+      else
+        return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
+                           DAG.getConstant(c1-c2, N1.getValueType()));
+    }
    }
    // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
-  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1))
+  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+    SDValue HiBitsMask =
+      DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+                                            VT.getSizeInBits() -
+                                              N1C->getZExtValue()),
+                      VT);
      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
-                       DAG.getConstant(~0ULL << N1C->getZExtValue(), VT));
+                       HiBitsMask);
+  }
  
    return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
  }
@@ -2497,7 +2587,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
+  EVT VT = N0.getValueType();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
  
    // fold (sra c1, c2) -> (sra c1, c2)
    if (N0C && N1C)
@@ -2509,7 +2600,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
    if (N0C && N0C->isAllOnesValue())
      return N0;
    // fold (sra x, (setge c, size(x))) -> undef
-  if (N1C && N1C->getZExtValue() >= VT.getSizeInBits())
+  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
      return DAG.getUNDEF(VT);
    // fold (sra x, 0) -> x
    if (N1C && N1C->isNullValue())
@@ -2517,18 +2608,22 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
    // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
    // sext_inreg.
    if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
-    unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue();
-    MVT EVT = MVT::getIntegerVT(LowBits);
-    if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)))
+    unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
+    EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+    if (VT.isVector())
+      ExtVT = EVT::getVectorVT(*DAG.getContext(),
+                               ExtVT, VT.getVectorNumElements());
+    if ((!LegalOperations ||
+         TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
        return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
-                         N0.getOperand(0), DAG.getValueType(EVT));
+                         N0.getOperand(0), DAG.getValueType(ExtVT));
    }
  
    // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
    if (N1C && N0.getOpcode() == ISD::SRA) {
      if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
        unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
-      if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1;
+      if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
        return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
                           DAG.getConstant(Sum, N1C->getValueType(0)));
      }
@@ -2544,15 +2639,14 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
      const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
      if (N01C && N1C) {
        // Determine what the truncate's result bitsize and type would be.
-      unsigned VTValSize = VT.getSizeInBits();
-      MVT TruncVT =
-        MVT::getIntegerVT(VTValSize - N1C->getZExtValue());
+      EVT TruncVT =
+        EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
        // Determine the residual right-shift amount.
        signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
  
        // If the shift is not a no-op (in which case this should be just a sign
        // extend already), the truncated to type is legal, sign_extend is legal
-      // on that type, and the the truncate to that type is both legal and free,
+      // on that type, and the truncate to that type is both legal and free,
        // perform the transform.
        if ((ShiftAmt > 0) &&
            TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
@@ -2576,10 +2670,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
        N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
      SDValue N101 = N1.getOperand(0).getOperand(1);
      if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      MVT TruncVT = N1.getValueType();
+      EVT TruncVT = N1.getValueType();
        SDValue N100 = N1.getOperand(0).getOperand(0);
        APInt TruncC = N101C->getAPIntValue();
-      TruncC.trunc(TruncVT.getSizeInBits());
+      TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
        return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
                           DAG.getNode(ISD::AND, N->getDebugLoc(),
                                       TruncVT,
@@ -2607,8 +2701,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  MVT VT = N0.getValueType();
-  unsigned OpSizeInBits = VT.getSizeInBits();
+  EVT VT = N0.getValueType();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
  
    // fold (srl c1, c2) -> c1 >>u c2
    if (N0C && N1C)
@@ -2641,7 +2735,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
    if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
      // Shifting in all undef bits?
-    MVT SmallVT = N0.getOperand(0).getValueType();
+    EVT SmallVT = N0.getOperand(0).getValueType();
      if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
        return DAG.getUNDEF(VT);
  
@@ -2662,7 +2756,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    if (N1C && N0.getOpcode() == ISD::CTLZ &&
        N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
      APInt KnownZero, KnownOne;
-    APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
      DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
  
      // If any of the input bits are KnownOne, then the input couldn't be all
@@ -2700,7 +2794,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
        N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
      SDValue N101 = N1.getOperand(0).getOperand(1);
      if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
-      MVT TruncVT = N1.getValueType();
+      EVT TruncVT = N1.getValueType();
        SDValue N100 = N1.getOperand(0).getOperand(0);
        APInt TruncC = N101C->getAPIntValue();
        TruncC.trunc(TruncVT.getSizeInBits());
@@ -2719,12 +2813,47 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
    if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
      return SDValue(N, 0);
  
-  return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
+  if (N1C) {
+    SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSRL.getNode())
+      return NewSRL;
+  }
+
+  // Here is a common situation. We want to optimize:
+  //
+  //   %a = ...
+  //   %b = and i32 %a, 2
+  //   %c = srl i32 %b, 1
+  //   brcond i32 %c ...
+  //
+  // into
+  // 
+  //   %a = ...
+  //   %b = and %a, 2
+  //   %c = setcc eq %b, 0
+  //   brcond %c ...
+  //
+  // However when after the source operand of SRL is optimized into AND, the SRL
+  // itself may not be optimized further. Look for it and add the BRCOND into
+  // the worklist.
+  if (N->hasOneUse()) {
+    SDNode *Use = *N->use_begin();
+    if (Use->getOpcode() == ISD::BRCOND)
+      AddToWorkList(Use);
+    else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+      // Also look pass the truncate.
+      Use = *Use->use_begin();
+      if (Use->getOpcode() == ISD::BRCOND)
+        AddToWorkList(Use);
+    }
+  }
+
+  return SDValue();
  }
  
  SDValue DAGCombiner::visitCTLZ(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (ctlz c1) -> c2
    if (isa<ConstantSDNode>(N0))
@@ -2734,7 +2863,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
  
  SDValue DAGCombiner::visitCTTZ(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (cttz c1) -> c2
    if (isa<ConstantSDNode>(N0))
@@ -2744,7 +2873,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
  
  SDValue DAGCombiner::visitCTPOP(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (ctpop c1) -> c2
    if (isa<ConstantSDNode>(N0))
@@ -2759,8 +2888,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
    ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
-  MVT VT = N->getValueType(0);
-  MVT VT0 = N0.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT VT0 = N0.getValueType();
  
    // fold (select C, X, X) -> X
    if (N1 == N2)
@@ -2825,7 +2954,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
      // Check against MVT::Other for SELECT_CC, which is a workaround for targets
      // having to say they don't support SELECT_CC on every type the DAG knows
      // about, since there is no way to mark an opcode illegal at all value types
-    if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other))
+    if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
+        TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
        return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
                           N0.getOperand(0), N0.getOperand(1),
                           N1, N2, N0.getOperand(2));
@@ -2945,7 +3075,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
  
  SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (sext c1) -> c1
    if (isa<ConstantSDNode>(N0))
@@ -2970,9 +3100,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
      // See if the value being truncated is already sign extended.  If so, just
      // eliminate the trunc/sext pair.
      SDValue Op = N0.getOperand(0);
-    unsigned OpBits   = Op.getValueType().getSizeInBits();
-    unsigned MidBits  = N0.getValueType().getSizeInBits();
-    unsigned DestBits = VT.getSizeInBits();
+    unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
+    unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
+    unsigned DestBits = VT.getScalarType().getSizeInBits();
      unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
  
      if (OpBits == DestBits) {
@@ -2995,9 +3125,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
      // fold (sext (truncate x)) -> (sextinreg x).
      if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
                                                   N0.getValueType())) {
-      if (Op.getValueType().bitsLT(VT))
+      if (OpBits < DestBits)
          Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
-      else if (Op.getValueType().bitsGT(VT))
+      else if (OpBits > DestBits)
          Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
        return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
                           DAG.getValueType(N0.getValueType()));
@@ -3019,7 +3149,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
                                         LN0->getBasePtr(), LN0->getSrcValue(),
                                         LN0->getSrcValueOffset(),
                                         N0.getValueType(),
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
        CombineTo(N, ExtLoad);
        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                    N0.getValueType(), ExtLoad);
@@ -3054,14 +3185,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
    if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
        ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
      if ((!LegalOperations && !LN0->isVolatile()) ||
-        TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT)) {
+        TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
        SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
                                         LN0->getChain(),
                                         LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), EVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
        CombineTo(N, ExtLoad);
        CombineTo(N0.getNode(),
                  DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
@@ -3071,14 +3203,42 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
      }
    }
  
-  // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
    if (N0.getOpcode() == ISD::SETCC) {
+    // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
+    if (VT.isVector() &&
+        // We know that the # elements of the results is the same as the
+        // # elements of the compare (and the # elements of the compare result
+        // for that matter).  Check to see that they are the same size.  If so,
+        // we know that the element size of the sext'd result matches the
+        // element size of the compare operands.
+        VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() &&
+      
+        // Only do this before legalize for now.
+        !LegalOperations) {
+      return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+                           N0.getOperand(1),
+                           cast<CondCodeSDNode>(N0.getOperand(2))->get());
+    }
+    
+    // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+    SDValue NegOne =
+      DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
      SDValue SCC =
        SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
-                       DAG.getConstant(~0ULL, VT), DAG.getConstant(0, VT),
+                       NegOne, DAG.getConstant(0, VT),
                         cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
      if (SCC.getNode()) return SCC;
+    if (!LegalOperations ||
+        TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
+      return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+                         DAG.getSetCC(N->getDebugLoc(),
+                                      TLI.getSetCCResultType(VT),
+                                      N0.getOperand(0), N0.getOperand(1),
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+                         NegOne, DAG.getConstant(0, VT));
    }
+  
+  
  
    // fold (sext x) -> (zext x) if the sign bit is known zero.
    if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
@@ -3090,7 +3250,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
  
  SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (zext c1) -> c1
    if (isa<ConstantSDNode>(N0))
@@ -3114,14 +3274,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
  
    // fold (zext (truncate x)) -> (and x, mask)
    if (N0.getOpcode() == ISD::TRUNCATE &&
-      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
+      (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
+                           N0.getValueType()) ||
+       !TLI.isZExtFree(N0.getValueType(), VT))) {
      SDValue Op = N0.getOperand(0);
      if (Op.getValueType().bitsLT(VT)) {
        Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
      } else if (Op.getValueType().bitsGT(VT)) {
        Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
      }
-    return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType());
+    return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
+                                  N0.getValueType().getScalarType());
    }
  
    // Fold (zext (and (trunc x), cst)) -> (and x, cst),
@@ -3159,7 +3323,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                         LN0->getBasePtr(), LN0->getSrcValue(),
                                         LN0->getSrcValueOffset(),
                                         N0.getValueType(),
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
        CombineTo(N, ExtLoad);
        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                    N0.getValueType(), ExtLoad);
@@ -3194,14 +3359,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
    if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
        ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
      if ((!LegalOperations && !LN0->isVolatile()) ||
-        TLI.isLoadExtLegal(ISD::ZEXTLOAD, EVT)) {
+        TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
                                         LN0->getChain(),
                                         LN0->getBasePtr(), LN0->getSrcValue(),
-                                       LN0->getSrcValueOffset(), EVT,
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->getSrcValueOffset(), MemVT,
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
        CombineTo(N, ExtLoad);
        CombineTo(N0.getNode(),
                  DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
@@ -3220,12 +3386,34 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
      if (SCC.getNode()) return SCC;
    }
  
+  // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+      isa<ConstantSDNode>(N0.getOperand(1)) &&
+      N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+      N0.hasOneUse()) {
+    if (N0.getOpcode() == ISD::SHL) {
+      // If the original shl may be shifting out bits, do not perform this
+      // transformation.
+      unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+      unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() -
+        N0.getOperand(0).getOperand(0).getValueType().getSizeInBits();
+      if (ShAmt > KnownZeroBits)
+        return SDValue();
+    }
+    DebugLoc dl = N->getDebugLoc();
+    return DAG.getNode(N0.getOpcode(), dl, VT,
+                       DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
+                       DAG.getNode(ISD::ZERO_EXTEND, dl,
+                                   N0.getOperand(1).getValueType(),
+                                   N0.getOperand(1)));
+  }
+
    return SDValue();
  }
  
  SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (aext c1) -> c1
    if (isa<ConstantSDNode>(N0))
@@ -3293,7 +3481,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
                                         LN0->getBasePtr(), LN0->getSrcValue(),
                                         LN0->getSrcValueOffset(),
                                         N0.getValueType(),
-                                       LN0->isVolatile(), LN0->getAlignment());
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
        CombineTo(N, ExtLoad);
        SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                    N0.getValueType(), ExtLoad);
@@ -3330,12 +3519,13 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
        !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
        N0.hasOneUse()) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT EVT = LN0->getMemoryVT();
+    EVT MemVT = LN0->getMemoryVT();
      SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
                                       VT, LN0->getChain(), LN0->getBasePtr(),
                                       LN0->getSrcValue(),
-                                     LN0->getSrcValueOffset(), EVT,
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->getSrcValueOffset(), MemVT,
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
      CombineTo(N, ExtLoad);
      CombineTo(N0.getNode(),
                DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
@@ -3400,31 +3590,32 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
    unsigned Opc = N->getOpcode();
    ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
    SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
-  MVT EVT = VT;
+  EVT VT = N->getValueType(0);
+  EVT ExtVT = VT;
  
    // This transformation isn't valid for vector loads.
    if (VT.isVector())
      return SDValue();
  
-  // Special case: SIGN_EXTEND_INREG is basically truncating to EVT then
+  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
    // extended to VT.
    if (Opc == ISD::SIGN_EXTEND_INREG) {
      ExtType = ISD::SEXTLOAD;
-    EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
-    if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))
+    ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+    if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
        return SDValue();
    }
  
-  unsigned EVTBits = EVT.getSizeInBits();
+  unsigned EVTBits = ExtVT.getSizeInBits();
    unsigned ShAmt = 0;
-  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
      if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
        ShAmt = N01->getZExtValue();
        // Is the shift amount a multiple of size of VT?
        if ((ShAmt & (EVTBits-1)) == 0) {
          N0 = N0.getOperand(0);
-        if (N0.getValueType().getSizeInBits() <= EVTBits)
+        // Is the load width a multiple of size of VT?
+        if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
            return SDValue();
        }
      }
@@ -3432,18 +3623,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
  
    // Do not generate loads of non-round integer types since these can
    // be expensive (and would be wrong if the type is not byte sized).
-  if (isa<LoadSDNode>(N0) && N0.hasOneUse() && EVT.isRound() &&
+  if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
        cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() > EVTBits &&
        // Do not change the width of a volatile load.
        !cast<LoadSDNode>(N0)->isVolatile()) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
-    MVT PtrType = N0.getOperand(1).getValueType();
+    EVT PtrType = N0.getOperand(1).getValueType();
  
      // For big endian targets, we need to adjust the offset to the pointer to
      // load the correct bytes.
      if (TLI.isBigEndian()) {
        unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
-      unsigned EVTStoreBits = EVT.getStoreSizeInBits();
+      unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
        ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
      }
  
@@ -3457,10 +3648,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
      SDValue Load = (ExtType == ISD::NON_EXTLOAD)
        ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
                      LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                    LN0->isVolatile(), NewAlign)
+                    LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
        : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
                         LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
-                       EVT, LN0->isVolatile(), NewAlign);
+                       ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                       NewAlign);
  
      // Replace the old load's chain with the new load's chain.
      WorkListRemover DeadNodes(*this);
@@ -3477,17 +3669,17 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
  SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
-  MVT VT = N->getValueType(0);
-  MVT EVT = cast<VTSDNode>(N1)->getVT();
-  unsigned VTBits = VT.getSizeInBits();
-  unsigned EVTBits = EVT.getSizeInBits();
+  EVT VT = N->getValueType(0);
+  EVT EVT = cast<VTSDNode>(N1)->getVT();
+  unsigned VTBits = VT.getScalarType().getSizeInBits();
+  unsigned EVTBits = EVT.getScalarType().getSizeInBits();
  
    // fold (sext_in_reg c1) -> c1
    if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
      return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
  
    // If the input is already sign extended, just drop the extension.
-  if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1)
+  if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
      return N0;
  
    // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
@@ -3502,7 +3694,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
    // if x is small enough.
    if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
      SDValue N00 = N0.getOperand(0);
-    if (N00.getValueType().getSizeInBits() < EVTBits)
+    if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits)
        return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
    }
  
@@ -3526,11 +3718,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
    // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
    if (N0.getOpcode() == ISD::SRL) {
      if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
-      if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) {
+      if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
          // We can turn this into an SRA iff the input to the SRL is already sign
          // extended enough.
          unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
-        if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+        if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
            return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
                               N0.getOperand(0), N0.getOperand(1));
        }
@@ -3547,7 +3739,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
                                       LN0->getChain(),
                                       LN0->getBasePtr(), LN0->getSrcValue(),
                                       LN0->getSrcValueOffset(), EVT,
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
      CombineTo(N, ExtLoad);
      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -3563,7 +3756,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
                                       LN0->getChain(),
                                       LN0->getBasePtr(), LN0->getSrcValue(),
                                       LN0->getSrcValueOffset(), EVT,
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
      CombineTo(N, ExtLoad);
      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
@@ -3573,7 +3767,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
  
  SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // noop truncate
    if (N0.getValueType() == N->getValueType(0))
@@ -3596,7 +3790,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
        return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
      else
        // if the source and dest are the same type, we can drop both the extend
-      // and the truncate
+      // and the truncate.
        return N0.getOperand(0);
    }
  
@@ -3623,33 +3817,31 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
  
  /// CombineConsecutiveLoads - build_pair (load, load) -> load
  /// if load locations are consecutive.
-SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
    assert(N->getOpcode() == ISD::BUILD_PAIR);
  
-  SDNode *LD1 = getBuildPairElt(N, 0);
-  if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
      return SDValue();
-  MVT LD1VT = LD1->getValueType(0);
-  SDNode *LD2 = getBuildPairElt(N, 1);
-  const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  EVT LD1VT = LD1->getValueType(0);
  
    if (ISD::isNON_EXTLoad(LD2) &&
        LD2->hasOneUse() &&
        // If both are volatile this would reduce the number of volatile loads.
        // If one is volatile it might be ok, but play conservative and bail out.
-      !cast<LoadSDNode>(LD1)->isVolatile() &&
-      !cast<LoadSDNode>(LD2)->isVolatile() &&
-      TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
-    LoadSDNode *LD = cast<LoadSDNode>(LD1);
-    unsigned Align = LD->getAlignment();
+      !LD1->isVolatile() &&
+      !LD2->isVolatile() &&
+      DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
+    unsigned Align = LD1->getAlignment();
      unsigned NewAlign = TLI.getTargetData()->
-      getABITypeAlignment(VT.getTypeForMVT());
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
  
      if (NewAlign <= Align &&
          (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
-      return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(),
-                         LD->getSrcValue(), LD->getSrcValueOffset(),
-                         false, Align);
+      return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
+                         LD1->getBasePtr(), LD1->getSrcValue(),
+                         LD1->getSrcValueOffset(), false, false, Align);
    }
  
    return SDValue();
@@ -3657,7 +3849,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) {
  
  SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // If the input is a BUILD_VECTOR with all constant elements, fold this now.
    // Only do this before legalize, since afterward the target may be depending
@@ -3675,7 +3867,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
          break;
        }
  
-    MVT DestEltVT = N->getValueType(0).getVectorElementType();
+    EVT DestEltVT = N->getValueType(0).getVectorElementType();
      assert(!DestEltVT.isVector() &&
             "Element type of vector ValueType must not be vector!");
      if (isSimple)
@@ -3685,7 +3877,18 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
    // If the input is a constant, let getNode fold it.
    if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
      SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
-    if (Res.getNode() != N) return Res;
+    if (Res.getNode() != N) {
+      if (!LegalOperations ||
+          TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
+        return Res;
+
+      // Folding it resulted in an illegal node, and it's too late to
+      // do that. Clean up the old node and forego the transformation.
+      // Ideally this won't happen very often, because instcombine
+      // and the earlier dagcombine runs (where illegal nodes are
+      // permitted) should have folded most of them already.
+      DAG.DeleteNode(Res.getNode());
+    }
    }
  
    // (conv (conv x, t1), t2) -> (conv x, t2)
@@ -3701,14 +3904,15 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
        (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
      unsigned Align = TLI.getTargetData()->
-      getABITypeAlignment(VT.getTypeForMVT());
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
      unsigned OrigAlign = LN0->getAlignment();
  
      if (Align <= OrigAlign) {
        SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
                                   LN0->getBasePtr(),
                                   LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                                 LN0->isVolatile(), OrigAlign);
+                                 LN0->isVolatile(), LN0->isNonTemporal(),
+                                 OrigAlign);
        AddToWorkList(N);
        CombineTo(N0.getNode(),
                  DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
@@ -3744,7 +3948,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
        isa<ConstantFPSDNode>(N0.getOperand(0)) &&
        VT.isInteger() && !VT.isVector()) {
      unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
-    MVT IntXVT = MVT::getIntegerVT(OrigXWidth);
+    EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
      if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
        SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
                                IntXVT, N0.getOperand(1));
@@ -3792,7 +3996,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
  }
  
  SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
    return CombineConsecutiveLoads(N, VT);
  }
  
@@ -3800,8 +4004,8 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
  /// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the
  /// destination element value type.
  SDValue DAGCombiner::
-ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
-  MVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
  
    // If this is already the right type, we're done.
    if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
@@ -3823,7 +4027,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
                                  DstEltVT, Op));
        AddToWorkList(Ops.back().getNode());
      }
-    MVT VT = MVT::getVectorVT(DstEltVT,
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
                                BV->getValueType(0).getVectorNumElements());
      return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
                         &Ops[0], Ops.size());
@@ -3836,7 +4040,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
      // Convert the input float vector to a int vector where the elements are the
      // same sizes.
      assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
-    MVT IntVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits());
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
      BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
      SrcEltVT = IntVT;
    }
@@ -3845,7 +4049,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
    // convert to integer first, then to FP of the right size.
    if (DstEltVT.isFloatingPoint()) {
      assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
-    MVT TmpVT = MVT::getIntegerVT(DstEltVT.getSizeInBits());
+    EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
      SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
  
      // Next, convert to FP elements of the same size.
@@ -3881,7 +4085,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
          Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
      }
  
-    MVT VT = MVT::getVectorVT(DstEltVT, Ops.size());
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
      return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
                         &Ops[0], Ops.size());
    }
@@ -3890,7 +4094,8 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, MVT DstEltVT) {
    // turns into multiple outputs.
    bool isS2V = ISD::isScalarToVector(BV);
    unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
-  MVT VT = MVT::getVectorVT(DstEltVT, NumOutputsPerInput*BV->getNumOperands());
+  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+                            NumOutputsPerInput*BV->getNumOperands());
    SmallVector<SDValue, 8> Ops;
  
    for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
@@ -3927,7 +4132,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold vector ops
    if (VT.isVector()) {
@@ -3968,7 +4173,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold vector ops
    if (VT.isVector()) {
@@ -4002,7 +4207,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold vector ops
    if (VT.isVector()) {
@@ -4019,10 +4224,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
    // fold (fmul A, 0) -> 0
    if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
      return N1;
+  // fold (fmul A, 0) -> 0, vector edition.
+  if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+    return N1;
    // fold (fmul X, 2.0) -> (fadd X, X)
    if (N1CFP && N1CFP->isExactlyValue(+2.0))
      return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
-  // fold (fmul X, (fneg 1.0)) -> (fneg X)
+  // fold (fmul X, -1.0) -> (fneg X)
    if (N1CFP && N1CFP->isExactlyValue(-1.0))
      if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
        return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
@@ -4054,7 +4262,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold vector ops
    if (VT.isVector()) {
@@ -4087,7 +4295,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (frem c1, c2) -> fmod(c1,c2)
    if (N0CFP && N1CFP && VT != MVT::ppcf128)
@@ -4101,7 +4309,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
    SDValue N1 = N->getOperand(1);
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
    ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    if (N0CFP && N1CFP && VT != MVT::ppcf128)  // Constant fold
      return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
@@ -4149,8 +4357,8 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
  SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  MVT VT = N->getValueType(0);
-  MVT OpVT = N0.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT OpVT = N0.getValueType();
  
    // fold (sint_to_fp c1) -> c1fp
    if (N0C && OpVT != MVT::ppcf128)
@@ -4171,8 +4379,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
  SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
-  MVT VT = N->getValueType(0);
-  MVT OpVT = N0.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT OpVT = N0.getValueType();
  
    // fold (uint_to_fp c1) -> c1fp
    if (N0C && OpVT != MVT::ppcf128)
@@ -4193,7 +4401,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
  SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (fp_to_sint c1fp) -> c1
    if (N0CFP)
@@ -4205,7 +4413,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
  SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (fp_to_uint c1fp) -> c1
    if (N0CFP && VT != MVT::ppcf128)
@@ -4218,7 +4426,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (fp_round c1fp) -> c1fp
    if (N0CFP && N0.getValueType() != MVT::ppcf128)
@@ -4251,8 +4459,8 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
  
  SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
    SDValue N0 = N->getOperand(0);
-  MVT VT = N->getValueType(0);
-  MVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  EVT VT = N->getValueType(0);
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
  
    // fold (fp_round_inreg c1fp) -> c1fp
@@ -4267,7 +4475,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
  SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
    if (N->hasOneUse() &&
@@ -4300,7 +4508,8 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
                                       LN0->getBasePtr(), LN0->getSrcValue(),
                                       LN0->getSrcValueOffset(),
                                       N0.getValueType(),
-                                     LN0->isVolatile(), LN0->getAlignment());
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
      CombineTo(N, ExtLoad);
      CombineTo(N0.getNode(),
                DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
@@ -4314,23 +4523,25 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
  
  SDValue DAGCombiner::visitFNEG(SDNode *N) {
    SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
  
    if (isNegatibleForFree(N0, LegalOperations))
      return GetNegatedExpression(N0, DAG, LegalOperations);
  
    // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
    // constant pool values.
-  if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
-      N0.getOperand(0).getValueType().isInteger() &&
-      !N0.getOperand(0).getValueType().isVector()) {
+  if (N0.getOpcode() == ISD::BIT_CONVERT && 
+      !VT.isVector() &&
+      N0.getNode()->hasOneUse() &&
+      N0.getOperand(0).getValueType().isInteger()) {
      SDValue Int = N0.getOperand(0);
-    MVT IntVT = Int.getValueType();
+    EVT IntVT = Int.getValueType();
      if (IntVT.isInteger() && !IntVT.isVector()) {
        Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
                DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
        AddToWorkList(Int.getNode());
        return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
-                         N->getValueType(0), Int);
+                         VT, Int);
      }
    }
  
@@ -4340,7 +4551,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
  SDValue DAGCombiner::visitFABS(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
  
    // fold (fabs c1) -> fabs(c1)
    if (N0CFP && VT != MVT::ppcf128)
@@ -4359,7 +4570,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
        N0.getOperand(0).getValueType().isInteger() &&
        !N0.getOperand(0).getValueType().isVector()) {
      SDValue Int = N0.getOperand(0);
-    MVT IntVT = Int.getValueType();
+    EVT IntVT = Int.getValueType();
      if (IntVT.isInteger() && !IntVT.isVector()) {
        Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
               DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
@@ -4376,14 +4587,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
    SDValue Chain = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
    SDValue N2 = N->getOperand(2);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
  
-  // never taken branch, fold to chain
-  if (N1C && N1C->isNullValue())
-    return Chain;
-  // unconditional branch
-  if (N1C && N1C->getAPIntValue() == 1)
-    return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2);
+  // If N is a constant we could fold this into a fallthrough or unconditional
+  // branch. However that doesn't happen very often in normal code, because
+  // Instcombine/SimplifyCFG should have handled the available opportunities.
+  // If we did this folding here, it would be necessary to update the
+  // MachineBasicBlock CFG, which is awkward.
+
    // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
    // on the target.
    if (N1.getOpcode() == ISD::SETCC &&
@@ -4393,6 +4603,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                         N1.getOperand(0), N1.getOperand(1), N2);
    }
  
+  SDNode *Trunc = 0;
+  if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
+    // Look pass truncate.
+    Trunc = N1.getNode();
+    N1 = N1.getOperand(0);
+  }
+
    if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
      // Match this pattern so that we can generate simpler code:
      //
@@ -4404,7 +4621,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
      // into
      // 
      //   %a = ...
-    //   %b = and %a, 2
+    //   %b = and i32 %a, 2
      //   %c = setcc eq %b, 0
      //   brcond %c ...
      //
@@ -4415,9 +4632,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
      SDValue Op1 = N1.getOperand(1);
  
      if (Op0.getOpcode() == ISD::AND &&
-        Op0.hasOneUse() &&
          Op1.getOpcode() == ISD::Constant) {
-      SDValue AndOp0 = Op0.getOperand(0);
        SDValue AndOp1 = Op0.getOperand(1);
  
        if (AndOp1.getOpcode() == ISD::Constant) {
@@ -4431,16 +4646,76 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
                           Op0, DAG.getConstant(0, Op0.getValueType()),
                           ISD::SETNE);
  
+          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                                          MVT::Other, Chain, SetCC, N2);
+          // Don't add the new BRCond into the worklist or else SimplifySelectCC
+          // will convert it back to (X & C1) >> C2.
+          CombineTo(N, NewBRCond, false);
+          // Truncate is dead.
+          if (Trunc) {
+            removeFromWorkList(Trunc);
+            DAG.DeleteNode(Trunc);
+          }
            // Replace the uses of SRL with SETCC
-          DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+          WorkListRemover DeadNodes(*this);
+          DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
            removeFromWorkList(N1.getNode());
            DAG.DeleteNode(N1.getNode());
-          return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
-                             MVT::Other, Chain, SetCC, N2);
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
          }
        }
      }
    }
+  
+  // Transform br(xor(x, y)) -> br(x != y)
+  // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+  if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
+    SDNode *TheXor = N1.getNode();
+    SDValue Op0 = TheXor->getOperand(0);
+    SDValue Op1 = TheXor->getOperand(1);
+    if (Op0.getOpcode() == Op1.getOpcode()) {
+      // Avoid missing important xor optimizations.
+      SDValue Tmp = visitXOR(TheXor);
+      if (Tmp.getNode()) {
+        DEBUG(dbgs() << "\nReplacing.8 ";
+              TheXor->dump(&DAG);
+              dbgs() << "\nWith: ";
+              Tmp.getNode()->dump(&DAG);
+              dbgs() << '\n');
+        WorkListRemover DeadNodes(*this);
+        DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes);
+        removeFromWorkList(TheXor);
+        DAG.DeleteNode(TheXor);
+        return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                           MVT::Other, Chain, Tmp, N2);
+      }
+    }
+
+    if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+      bool Equal = false;
+      if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
+        if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
+            Op0.getOpcode() == ISD::XOR) {
+          TheXor = Op0.getNode();
+          Equal = true;
+        }
+
+      EVT SetCCVT = N1.getValueType();
+      if (LegalTypes)
+        SetCCVT = TLI.getSetCCResultType(SetCCVT);
+      SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
+                                   SetCCVT,
+                                   Op0, Op1,
+                                   Equal ? ISD::SETEQ : ISD::SETNE);
+      // Replace the uses of XOR with SETCC
+      WorkListRemover DeadNodes(*this);
+      DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+      removeFromWorkList(N1.getNode());
+      DAG.DeleteNode(N1.getNode());
+      return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                         MVT::Other, Chain, SetCC, N2);
+    }
+  }
  
    return SDValue();
  }
@@ -4451,22 +4726,18 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
    CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
    SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
  
+  // If N is a constant we could fold this into a fallthrough or unconditional
+  // branch. However that doesn't happen very often in normal code, because
+  // Instcombine/SimplifyCFG should have handled the available opportunities.
+  // If we did this folding here, it would be necessary to update the
+  // MachineBasicBlock CFG, which is awkward.
+
    // Use SimplifySetCC to simplify SETCC's.
    SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
                                 CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
                                 false);
    if (Simp.getNode()) AddToWorkList(Simp.getNode());
  
-  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.getNode());
-
-  // fold br_cc true, dest -> br dest (unconditional branch)
-  if (SCCC && !SCCC->isNullValue())
-    return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other,
-                       N->getOperand(0), N->getOperand(4));
-  // fold br_cc false, dest -> unconditional fall through
-  if (SCCC && SCCC->isNullValue())
-    return N->getOperand(0);
-
    // fold to a simpler setcc
    if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
      return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
@@ -4489,7 +4760,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
  
    bool isLoad = true;
    SDValue Ptr;
-  MVT VT;
+  EVT VT;
    if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
      if (LD->isIndexed())
        return false;
@@ -4577,9 +4848,11 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
                                   BasePtr, Offset, AM);
    ++PreIndexedNodes;
    ++NodesCombined;
-  DOUT << "\nReplacing.4 "; DEBUG(N->dump(&DAG));
-  DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));
-  DOUT << '\n';
+  DEBUG(dbgs() << "\nReplacing.4 ";
+        N->dump(&DAG);
+        dbgs() << "\nWith: ";
+        Result.getNode()->dump(&DAG);
+        dbgs() << '\n');
    WorkListRemover DeadNodes(*this);
    if (isLoad) {
      DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
@@ -4614,7 +4887,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
  
    bool isLoad = true;
    SDValue Ptr;
-  MVT VT;
+  EVT VT;
    if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
      if (LD->isIndexed())
        return false;
@@ -4650,7 +4923,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
      SDValue Offset;
      ISD::MemIndexedMode AM = ISD::UNINDEXED;
      if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
-      if (Ptr == Offset)
+      if (Ptr == Offset && Op->getOpcode() == ISD::ADD)
          std::swap(BasePtr, Offset);
        if (Ptr != BasePtr)
          continue;
@@ -4709,9 +4982,11 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
                                  BasePtr, Offset, AM);
          ++PostIndexedNodes;
          ++NodesCombined;
-        DOUT << "\nReplacing.5 "; DEBUG(N->dump(&DAG));
-        DOUT << "\nWith: "; DEBUG(Result.getNode()->dump(&DAG));
-        DOUT << '\n';
+        DEBUG(dbgs() << "\nReplacing.5 ";
+              N->dump(&DAG);
+              dbgs() << "\nWith: ";
+              Result.getNode()->dump(&DAG);
+              dbgs() << '\n');
          WorkListRemover DeadNodes(*this);
          if (isLoad) {
            DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
@@ -4740,49 +5015,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
    return false;
  }
  
-/// InferAlignment - If we can infer some alignment information from this
-/// pointer, return it.
-static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) {
-  // If this is a direct reference to a stack slot, use information about the
-  // stack slot's alignment.
-  int FrameIdx = 1 << 31;
-  int64_t FrameOffset = 0;
-  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
-    FrameIdx = FI->getIndex();
-  } else if (Ptr.getOpcode() == ISD::ADD &&
-             isa<ConstantSDNode>(Ptr.getOperand(1)) &&
-             isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
-    FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
-    FrameOffset = Ptr.getConstantOperandVal(1);
-  }
-
-  if (FrameIdx != (1 << 31)) {
-    // FIXME: Handle FI+CST.
-    const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();
-    if (MFI.isFixedObjectIndex(FrameIdx)) {
-      int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
-
-      // The alignment of the frame index can be determined from its offset from
-      // the incoming frame position.  If the frame object is at offset 32 and
-      // the stack is guaranteed to be 16-byte aligned, then we know that the
-      // object is 16-byte aligned.
-      unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment();
-      unsigned Align = MinAlign(ObjectOffset, StackAlign);
-
-      // Finally, the frame object itself may have a known alignment.  Factor
-      // the alignment + offset into a new alignment.  For example, if we know
-      // the  FI is 8 byte aligned, but the pointer is 4 off, we really have a
-      // 4-byte alignment of the resultant pointer.  Likewise align 4 + 4-byte
-      // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
-      unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
-                                      FrameOffset);
-      return std::max(Align, FIInfoAlign);
-    }
-  }
-
-  return 0;
-}
-
  SDValue DAGCombiner::visitLOAD(SDNode *N) {
    LoadSDNode *LD  = cast<LoadSDNode>(N);
    SDValue Chain = LD->getChain();
@@ -4790,13 +5022,13 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
  
    // Try to infer better alignment information than the load already has.
    if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
-    if (unsigned Align = InferAlignment(Ptr, DAG)) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
        if (Align > LD->getAlignment())
          return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
                                LD->getValueType(0),
                                Chain, Ptr, LD->getSrcValue(),
                                LD->getSrcValueOffset(), LD->getMemoryVT(),
-                              LD->isVolatile(), Align);
+                              LD->isVolatile(), LD->isNonTemporal(), Align);
      }
    }
  
@@ -4813,9 +5045,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
          // v3         = add v2, c
          // Now we replace use of chain2 with chain1.  This makes the second load
          // isomorphic to the one we are deleting, and thus makes this load live.
-        DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));
-        DOUT << "\nWith chain: "; DEBUG(Chain.getNode()->dump(&DAG));
-        DOUT << "\n";
+        DEBUG(dbgs() << "\nReplacing.6 ";
+              N->dump(&DAG);
+              dbgs() << "\nWith chain: ";
+              Chain.getNode()->dump(&DAG);
+              dbgs() << "\n");
          WorkListRemover DeadNodes(*this);
          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
  
@@ -4831,9 +5065,11 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
        assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
        if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
          SDValue Undef = DAG.getUNDEF(N->getValueType(0));
-        DOUT << "\nReplacing.6 "; DEBUG(N->dump(&DAG));
-        DOUT << "\nWith: "; DEBUG(Undef.getNode()->dump(&DAG));
-        DOUT << " and 2 other values\n";
+        DEBUG(dbgs() << "\nReplacing.7 ";
+              N->dump(&DAG);
+              dbgs() << "\nWith: ";
+              Undef.getNode()->dump(&DAG);
+              dbgs() << " and 2 other values\n");
          WorkListRemover DeadNodes(*this);
          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
@@ -4874,7 +5110,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
          ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
                                 BetterChain, Ptr,
                                 LD->getSrcValue(), LD->getSrcValueOffset(),
-                               LD->isVolatile(), LD->getAlignment());
+                               LD->isVolatile(), LD->isNonTemporal(),
+                               LD->getAlignment());
        } else {
          ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
                                    LD->getValueType(0),
@@ -4882,13 +5119,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
                                    LD->getSrcValueOffset(),
                                    LD->getMemoryVT(),
                                    LD->isVolatile(),
+                                  LD->isNonTemporal(),
                                    LD->getAlignment());
        }
  
        // Create token factor to keep old chain connected.
        SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
                                    MVT::Other, Chain, ReplLoad.getValue(1));
-
+      
+      // Make sure the new and old chains are cleaned up.
+      AddToWorkList(Token.getNode());
+      
        // Replace uses with load result and token factor. Don't add users
        // to work list.
        return CombineTo(N, ReplLoad.getValue(0), Token, false);
@@ -4915,7 +5156,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
    SDValue Chain = ST->getChain();
    SDValue Value = ST->getValue();
    SDValue Ptr   = ST->getBasePtr();
-  MVT VT = Value.getValueType();
+  EVT VT = Value.getValueType();
  
    if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
      return SDValue();
@@ -4937,15 +5178,17 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
      APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
      if (Opc == ISD::AND)
        Imm ^= APInt::getAllOnesValue(BitWidth);
+    if (Imm == 0 || Imm.isAllOnesValue())
+      return SDValue();
      unsigned ShAmt = Imm.countTrailingZeros();
      unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
      unsigned NewBW = NextPowerOf2(MSB - ShAmt);
-    MVT NewVT = MVT::getIntegerVT(NewBW);
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
      while (NewBW < BitWidth &&
             !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
               TLI.isNarrowingProfitable(VT, NewVT))) {
        NewBW = NextPowerOf2(NewBW);
-      NewVT = MVT::getIntegerVT(NewBW);
+      NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
      }
      if (NewBW >= BitWidth)
        return SDValue();
@@ -4967,7 +5210,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
  
        unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
        if (NewAlign <
-          TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForMVT()))
+          TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext())))
          return SDValue();
  
        SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
@@ -4976,13 +5219,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
        SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
                                    LD->getChain(), NewPtr,
                                    LD->getSrcValue(), LD->getSrcValueOffset(),
-                                  LD->isVolatile(), NewAlign);
+                                  LD->isVolatile(), LD->isNonTemporal(),
+                                  NewAlign);
        SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
                                     DAG.getConstant(NewImm, NewVT));
        SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
                                     NewVal, NewPtr,
                                     ST->getSrcValue(), ST->getSrcValueOffset(),
-                                   false, NewAlign);
+                                   false, false, NewAlign);
  
        AddToWorkList(NewPtr.getNode());
        AddToWorkList(NewLD.getNode());
@@ -5006,12 +5250,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
  
    // Try to infer better alignment information than the store already has.
    if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
-    if (unsigned Align = InferAlignment(Ptr, DAG)) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
        if (Align > ST->getAlignment())
          return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
                                   Ptr, ST->getSrcValue(),
                                   ST->getSrcValueOffset(), ST->getMemoryVT(),
-                                 ST->isVolatile(), Align);
+                                 ST->isVolatile(), ST->isNonTemporal(), Align);
      }
    }
  
@@ -5020,15 +5264,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
    if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
        ST->isUnindexed()) {
      unsigned OrigAlign = ST->getAlignment();
-    MVT SVT = Value.getOperand(0).getValueType();
+    EVT SVT = Value.getOperand(0).getValueType();
      unsigned Align = TLI.getTargetData()->
-      getABITypeAlignment(SVT.getTypeForMVT());
+      getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
      if (Align <= OrigAlign &&
          ((!LegalOperations && !ST->isVolatile()) ||
           TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
        return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
                            Ptr, ST->getSrcValue(),
-                          ST->getSrcValueOffset(), ST->isVolatile(), OrigAlign);
+                          ST->getSrcValueOffset(), ST->isVolatile(),
+                          ST->isNonTemporal(), OrigAlign);
    }
  
    // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
@@ -5039,8 +5284,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
      // transform should not be done in this case.
      if (Value.getOpcode() != ISD::TargetConstantFP) {
        SDValue Tmp;
-      switch (CFP->getValueType(0).getSimpleVT()) {
-      default: assert(0 && "Unknown FP type");
+      switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unknown FP type");
        case MVT::f80:    // We don't do this for these yet.
        case MVT::f128:
        case MVT::ppcf128:
@@ -5054,7 +5299,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
            return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
                                Ptr, ST->getSrcValue(),
                                ST->getSrcValueOffset(), ST->isVolatile(),
-                              ST->getAlignment());
+                              ST->isNonTemporal(), ST->getAlignment());
          }
          break;
        case MVT::f64:
@@ -5066,7 +5311,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
            return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
                                Ptr, ST->getSrcValue(),
                                ST->getSrcValueOffset(), ST->isVolatile(),
-                              ST->getAlignment());
+                              ST->isNonTemporal(), ST->getAlignment());
          } else if (!ST->isVolatile() &&
                     TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
            // Many FP stores are not made apparent until after legalize, e.g. for
@@ -5080,18 +5325,21 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
            int SVOffset = ST->getSrcValueOffset();
            unsigned Alignment = ST->getAlignment();
            bool isVolatile = ST->isVolatile();
+          bool isNonTemporal = ST->isNonTemporal();
  
            SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
                                       Ptr, ST->getSrcValue(),
                                       ST->getSrcValueOffset(),
-                                     isVolatile, ST->getAlignment());
+                                     isVolatile, isNonTemporal,
+                                     ST->getAlignment());
            Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
                              DAG.getConstant(4, Ptr.getValueType()));
            SVOffset += 4;
            Alignment = MinAlign(Alignment, 4U);
            SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
                                       Ptr, ST->getSrcValue(),
-                                     SVOffset, isVolatile, Alignment);
+                                     SVOffset, isVolatile, isNonTemporal,
+                                     Alignment);
            return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
                               St0, St1);
          }
@@ -5107,23 +5355,28 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
  
      // If there is a better chain.
      if (Chain != BetterChain) {
-      // Replace the chain to avoid dependency.
        SDValue ReplStore;
+
+      // Replace the chain to avoid dependency.
        if (ST->isTruncatingStore()) {
          ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
                                        ST->getSrcValue(),ST->getSrcValueOffset(),
-                                      ST->getMemoryVT(),
-                                      ST->isVolatile(), ST->getAlignment());
+                                      ST->getMemoryVT(), ST->isVolatile(),
+                                      ST->isNonTemporal(), ST->getAlignment());
        } else {
          ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
                                   ST->getSrcValue(), ST->getSrcValueOffset(),
-                                 ST->isVolatile(), ST->getAlignment());
+                                 ST->isVolatile(), ST->isNonTemporal(),
+                                 ST->getAlignment());
        }
  
        // Create token to keep both nodes around.
        SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
                                    MVT::Other, Chain, ReplStore);
  
+      // Make sure the new and old chains are cleaned up.
+      AddToWorkList(Token.getNode());
+
        // Don't add users to work list.
        return CombineTo(N, Token, false);
      }
@@ -5148,13 +5401,14 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
        return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
                                 Ptr, ST->getSrcValue(),
                                 ST->getSrcValueOffset(), ST->getMemoryVT(),
-                               ST->isVolatile(), ST->getAlignment());
+                               ST->isVolatile(), ST->isNonTemporal(),
+                               ST->getAlignment());
  
      // Otherwise, see if we can simplify the operation with
      // SimplifyDemandedBits, which only works if the value has a single use.
      if (SimplifyDemandedBits(Value,
                               APInt::getLowBitsSet(
-                               Value.getValueSizeInBits(),
+                               Value.getValueType().getScalarType().getSizeInBits(),
                                 ST->getMemoryVT().getSizeInBits())))
        return SDValue(N, 0);
    }
@@ -5181,7 +5435,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
      return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
                               Ptr, ST->getSrcValue(),
                               ST->getSrcValueOffset(), ST->getMemoryVT(),
-                             ST->isVolatile(), ST->getAlignment());
+                             ST->isVolatile(), ST->isNonTemporal(),
+                             ST->getAlignment());
    }
  
    return ReduceLoadOpStoreWidth(N);
@@ -5207,10 +5462,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
    // BUILD_VECTOR with undef elements and the inserted element.
    if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && 
        isa<ConstantSDNode>(EltNo)) {
-    MVT VT = InVec.getValueType();
-    MVT EVT = VT.getVectorElementType();
+    EVT VT = InVec.getValueType();
+    EVT EltVT = VT.getVectorElementType();
      unsigned NElts = VT.getVectorNumElements();
-    SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EVT));
+    SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
  
      unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
      if (Elt < Ops.size())
@@ -5226,12 +5481,16 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
    SDValue InVec = N->getOperand(0);
  
   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
-   // If the operand is wider than the vector element type then it is implicitly
-   // truncated.  Make that explicit here.
-   MVT EltVT = InVec.getValueType().getVectorElementType();
+   // Check if the result type doesn't match the inserted element type. A
+   // SCALAR_TO_VECTOR may truncate the inserted element and the
+   // EXTRACT_VECTOR_ELT may widen the extracted vector.
+   EVT EltVT = InVec.getValueType().getVectorElementType();
     SDValue InOp = InVec.getOperand(0);
-   if (InOp.getValueType() != EltVT)
-     return DAG.getNode(ISD::TRUNCATE, InVec.getDebugLoc(), EltVT, InOp);
+   EVT NVT = N->getValueType(0);
+   if (InOp.getValueType() != NVT) {
+     assert(InOp.getValueType().isInteger() && NVT.isInteger());
+     return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
+   }
     return InOp;
   }
  
@@ -5248,18 +5507,18 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
      unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
      bool NewLoad = false;
      bool BCNumEltsChanged = false;
-    MVT VT = InVec.getValueType();
-    MVT EVT = VT.getVectorElementType();
-    MVT LVT = EVT;
+    EVT VT = InVec.getValueType();
+    EVT ExtVT = VT.getVectorElementType();
+    EVT LVT = ExtVT;
  
      if (InVec.getOpcode() == ISD::BIT_CONVERT) {
-      MVT BCVT = InVec.getOperand(0).getValueType();
-      if (!BCVT.isVector() || EVT.bitsGT(BCVT.getVectorElementType()))
+      EVT BCVT = InVec.getOperand(0).getValueType();
+      if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
          return SDValue();
        if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
          BCNumEltsChanged = true;
        InVec = InVec.getOperand(0);
-      EVT = BCVT.getVectorElementType();
+      ExtVT = BCVT.getVectorElementType();
        NewLoad = true;
      }
  
@@ -5268,7 +5527,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
      if (ISD::isNormalLoad(InVec.getNode())) {
        LN0 = cast<LoadSDNode>(InVec);
      } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
-               InVec.getOperand(0).getValueType() == EVT &&
+               InVec.getOperand(0).getValueType() == ExtVT &&
                 ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
        LN0 = cast<LoadSDNode>(InVec.getOperand(0));
      } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
@@ -5302,7 +5561,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
        // Check the resultant load doesn't need a higher alignment than the
        // original load.
        unsigned NewAlign =
-        TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForMVT());
+        TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
  
        if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
          return SDValue();
@@ -5313,7 +5572,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
      SDValue NewPtr = LN0->getBasePtr();
      if (Elt) {
        unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
-      MVT PtrType = NewPtr.getValueType();
+      EVT PtrType = NewPtr.getValueType();
        if (TLI.isBigEndian())
          PtrOff = VT.getSizeInBits() / 8 - PtrOff;
        NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
@@ -5322,7 +5581,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
  
      return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
                         LN0->getSrcValue(), LN0->getSrcValueOffset(),
-                       LN0->isVolatile(), Align);
+                       LN0->isVolatile(), LN0->isNonTemporal(), Align);
    }
  
    return SDValue();
@@ -5330,8 +5589,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
  
  SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
    unsigned NumInScalars = N->getNumOperands();
-  MVT VT = N->getValueType(0);
-  MVT EltType = VT.getVectorElementType();
+  EVT VT = N->getValueType(0);
  
    // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
    // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
@@ -5428,11 +5686,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
  SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
    return SDValue();
    
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
    unsigned NumElts = VT.getVectorNumElements();
  
    SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
  
    assert(N0.getValueType().getVectorNumElements() == NumElts &&
          "Vector shuffle must be normalized in DAG");
@@ -5490,7 +5747,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
  /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
  ///      vector_shuffle V, Zero, <0, 4, 2, 4>
  SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
    DebugLoc dl = N->getDebugLoc();
    SDValue LHS = N->getOperand(0);
    SDValue RHS = N->getOperand(1);
@@ -5513,14 +5770,14 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
        }
  
        // Let's see if the target supports this vector_shuffle.
-      MVT RVT = RHS.getValueType();
+      EVT RVT = RHS.getValueType();
        if (!TLI.isVectorClearMaskLegal(Indices, RVT))
          return SDValue();
  
        // Return the new VECTOR_SHUFFLE node.
-      MVT EVT = RVT.getVectorElementType();
+      EVT EltVT = RVT.getVectorElementType();
        SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
-                                     DAG.getConstant(0, EVT));
+                                     DAG.getConstant(0, EltVT));
        SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
                                   RVT, &ZeroOps[0], ZeroOps.size());
        LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
@@ -5539,10 +5796,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
    // things. Simplifying them may result in a loss of legality.
    if (LegalOperations) return SDValue();
  
-  MVT VT = N->getValueType(0);
+  EVT VT = N->getValueType(0);
    assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
  
-  MVT EltType = VT.getVectorElementType();
+  EVT EltType = VT.getVectorElementType();
    SDValue LHS = N->getOperand(0);
    SDValue RHS = N->getOperand(1);
    SDValue Shuffle = XformToShuffleWithZero(N);
@@ -5585,7 +5842,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
      }
  
      if (Ops.size() == LHS.getNumOperands()) {
-      MVT VT = LHS.getValueType();
+      EVT VT = LHS.getValueType();
        return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
                           &Ops[0], Ops.size());
      }
@@ -5649,33 +5906,50 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
  
        // If this is an EXTLOAD, the VT's must match.
        if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
-        // FIXME: this conflates two src values, discarding one.  This is not
-        // the right thing to do, but nothing uses srcvalues now.  When they do,
-        // turn SrcValue into a list of locations.
+        // FIXME: this discards src value information.  This is
+        // over-conservative. It would be beneficial to be able to remember
+        // both potential memory locations.  Since we are discarding
+        // src value info, don't do the transformation if the memory
+        // locations are not in the default address space.
+        unsigned LLDAddrSpace = 0, RLDAddrSpace = 0;
+        if (const Value *LLDVal = LLD->getMemOperand()->getValue()) {
+          if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType()))
+            LLDAddrSpace = PT->getAddressSpace();
+        }
+        if (const Value *RLDVal = RLD->getMemOperand()->getValue()) {
+          if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType()))
+            RLDAddrSpace = PT->getAddressSpace();
+        }
          SDValue Addr;
-        if (TheSelect->getOpcode() == ISD::SELECT) {
-          // Check that the condition doesn't reach either load.  If so, folding
-          // this will induce a cycle into the DAG.
-          if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-              !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) {
-            Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
-                               LLD->getBasePtr().getValueType(),
-                               TheSelect->getOperand(0), LLD->getBasePtr(),
-                               RLD->getBasePtr());
-          }
-        } else {
-          // Check that the condition doesn't reach either load.  If so, folding
-          // this will induce a cycle into the DAG.
-          if (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-              !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
-              !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()) &&
-              !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())) {
-            Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
-                               LLD->getBasePtr().getValueType(),
-                               TheSelect->getOperand(0),
-                               TheSelect->getOperand(1),
-                               LLD->getBasePtr(), RLD->getBasePtr(),
-                               TheSelect->getOperand(4));
+        if (LLDAddrSpace == 0 && RLDAddrSpace == 0) {
+          if (TheSelect->getOpcode() == ISD::SELECT) {
+            // Check that the condition doesn't reach either load.  If so, folding
+            // this will induce a cycle into the DAG.
+            if ((!LLD->hasAnyUseOfValue(1) ||
+                 !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
+                (!RLD->hasAnyUseOfValue(1) ||
+                 !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
+              Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+                                 LLD->getBasePtr().getValueType(),
+                                 TheSelect->getOperand(0), LLD->getBasePtr(),
+                                 RLD->getBasePtr());
+            }
+          } else {
+            // Check that the condition doesn't reach either load.  If so, folding
+            // this will induce a cycle into the DAG.
+            if ((!LLD->hasAnyUseOfValue(1) ||
+                 (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+                  !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
+                (!RLD->hasAnyUseOfValue(1) ||
+                 (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
+                  !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
+              Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+                                 LLD->getBasePtr().getValueType(),
+                                 TheSelect->getOperand(0),
+                                 TheSelect->getOperand(1),
+                                 LLD->getBasePtr(), RLD->getBasePtr(),
+                                 TheSelect->getOperand(4));
+            }
            }
          }
  
@@ -5685,18 +5959,18 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
              Load = DAG.getLoad(TheSelect->getValueType(0),
                                 TheSelect->getDebugLoc(),
                                 LLD->getChain(),
-                               Addr,LLD->getSrcValue(),
-                               LLD->getSrcValueOffset(),
+                               Addr, 0, 0,
                                 LLD->isVolatile(),
+                               LLD->isNonTemporal(),
                                 LLD->getAlignment());
            } else {
              Load = DAG.getExtLoad(LLD->getExtensionType(),
                                    TheSelect->getDebugLoc(),
                                    TheSelect->getValueType(0),
-                                  LLD->getChain(), Addr, LLD->getSrcValue(),
-                                  LLD->getSrcValueOffset(),
+                                  LLD->getChain(), Addr, 0, 0,
                                    LLD->getMemoryVT(),
                                    LLD->isVolatile(),
+                                  LLD->isNonTemporal(),
                                    LLD->getAlignment());
            }
  
@@ -5724,7 +5998,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
    // (x ? y : y) -> y.
    if (N2 == N3) return N2;
    
-  MVT VT = N2.getValueType();
+  EVT VT = N2.getValueType();
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
    ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
    ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
@@ -5804,7 +6078,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
                              CstOffset);
          return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
                             PseudoSourceValue::getConstantPool(), 0, false,
-                           Alignment);
+                           false, Alignment);
  
        }
      }  
@@ -5816,8 +6090,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
        N2.getValueType().isInteger() &&
        (N1C->isNullValue() ||                         // (a < 0) ? b : 0
         (N1C->getAPIntValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
-    MVT XType = N0.getValueType();
-    MVT AType = N2.getValueType();
+    EVT XType = N0.getValueType();
+    EVT AType = N2.getValueType();
      if (XType.bitsGE(AType)) {
        // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
        // single-bit constant.
@@ -5896,7 +6170,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
    // FIXME: Turn all of these into setcc if setcc if setcc is legal
    // otherwise, go ahead with the folds.
    if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
-    MVT XType = N0.getValueType();
+    EVT XType = N0.getValueType();
      if (!LegalOperations ||
          TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
        SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
@@ -5938,7 +6212,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
    if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
        N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
        N2.getOperand(0) == N1 && N0.getValueType().isInteger()) {
-    MVT XType = N0.getValueType();
+    EVT XType = N0.getValueType();
      SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0,
                                  DAG.getConstant(XType.getSizeInBits()-1,
                                                  getShiftAmountTy()));
@@ -5953,7 +6227,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
    if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
        N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
      if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
-      MVT XType = N0.getValueType();
+      EVT XType = N0.getValueType();
        if (SubC->isNullValue() && XType.isInteger()) {
          SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
                                      N0,
@@ -5972,11 +6246,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
  }
  
  /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
-SDValue DAGCombiner::SimplifySetCC(MVT VT, SDValue N0,
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
                                     SDValue N1, ISD::CondCode Cond,
                                     DebugLoc DL, bool foldBooleans) {
    TargetLowering::DAGCombinerInfo
-    DagCombineInfo(DAG, Level == Unrestricted, false, this);
+    DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
    return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
  }
  
@@ -6008,11 +6282,12 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
    return S;
  }
  
-/// FindBaseOffset - Return true if base is known not to alias with anything
-/// but itself.  Provides base object and offset as results.
-static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) {
+/// FindBaseOffset - Return true if base is a frame index, which is known not
+// to alias with anything but itself.  Provides base object and offset as results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+                           GlobalValue *&GV, void *&CV) {
    // Assume it is a primitive operation.
-  Base = Ptr; Offset = 0;
+  Base = Ptr; Offset = 0; GV = 0; CV = 0;
  
    // If it's an adding a simple constant then integrate the offset.
    if (Base.getOpcode() == ISD::ADD) {
@@ -6021,36 +6296,73 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset) {
        Offset += C->getZExtValue();
      }
    }
+  
+  // Return the underlying GlobalValue, and update the Offset.  Return false
+  // for GlobalAddressSDNode since the same GlobalAddress may be represented
+  // by multiple nodes with different offsets.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
+    GV = G->getGlobal();
+    Offset += G->getOffset();
+    return false;
+  }
  
+  // Return the underlying Constant value, and update the Offset.  Return false
+  // for ConstantSDNodes since the same constant pool entry may be represented
+  // by multiple nodes with different offsets.
+  if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
+    CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
+                                         : (void *)C->getConstVal();
+    Offset += C->getOffset();
+    return false;
+  }
    // If it's any of the following then it can't alias with anything but itself.
-  return isa<FrameIndexSDNode>(Base) ||
-         isa<ConstantPoolSDNode>(Base) ||
-         isa<GlobalAddressSDNode>(Base);
+  return isa<FrameIndexSDNode>(Base);
  }
  
  /// isAlias - Return true if there is any possibility that the two addresses
  /// overlap.
  bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
                            const Value *SrcValue1, int SrcValueOffset1,
+                          unsigned SrcValueAlign1,
                            SDValue Ptr2, int64_t Size2,
-                          const Value *SrcValue2, int SrcValueOffset2) const {
+                          const Value *SrcValue2, int SrcValueOffset2,
+                          unsigned SrcValueAlign2) const {
    // If they are the same then they must be aliases.
    if (Ptr1 == Ptr2) return true;
  
    // Gather base node and offset information.
    SDValue Base1, Base2;
    int64_t Offset1, Offset2;
-  bool KnownBase1 = FindBaseOffset(Ptr1, Base1, Offset1);
-  bool KnownBase2 = FindBaseOffset(Ptr2, Base2, Offset2);
+  GlobalValue *GV1, *GV2;
+  void *CV1, *CV2;
+  bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
+  bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
  
-  // If they have a same base address then...
-  if (Base1 == Base2)
-    // Check to see if the addresses overlap.
+  // If they have a same base address then check to see if they overlap.
+  if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
      return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
  
-  // If we know both bases then they can't alias.
-  if (KnownBase1 && KnownBase2) return false;
+  // If we know what the bases are, and they aren't identical, then we know they
+  // cannot alias.
+  if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+    return false;
  
+  // If we know required SrcValue1 and SrcValue2 have relatively large alignment
+  // compared to the size and offset of the access, we may be able to prove they
+  // do not alias.  This check is conservative for now to catch cases created by
+  // splitting vector types.
+  if ((SrcValueAlign1 == SrcValueAlign2) &&
+      (SrcValueOffset1 != SrcValueOffset2) &&
+      (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
+    int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
+    int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+    
+    // There is no overlap between these relatively aligned accesses of similar
+    // size, return no alias.
+    if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+      return false;
+  }
+  
    if (CombinerGlobalAA) {
      // Use alias analysis information.
      int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
@@ -6070,20 +6382,24 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
  /// node.  Returns true if the operand was a load.
  bool DAGCombiner::FindAliasInfo(SDNode *N,
                          SDValue &Ptr, int64_t &Size,
-                        const Value *&SrcValue, int &SrcValueOffset) const {
+                        const Value *&SrcValue, 
+                        int &SrcValueOffset,
+                        unsigned &SrcValueAlign) const {
    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
      Ptr = LD->getBasePtr();
      Size = LD->getMemoryVT().getSizeInBits() >> 3;
      SrcValue = LD->getSrcValue();
      SrcValueOffset = LD->getSrcValueOffset();
+    SrcValueAlign = LD->getOriginalAlignment();
      return true;
    } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
      Ptr = ST->getBasePtr();
      Size = ST->getMemoryVT().getSizeInBits() >> 3;
      SrcValue = ST->getSrcValue();
      SrcValueOffset = ST->getSrcValueOffset();
+    SrcValueAlign = ST->getOriginalAlignment();
    } else {
-    assert(0 && "FindAliasInfo expected a memory operand");
+    llvm_unreachable("FindAliasInfo expected a memory operand");
    }
  
    return false;
@@ -6094,28 +6410,45 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,
  void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
                                     SmallVector<SDValue, 8> &Aliases) {
    SmallVector<SDValue, 8> Chains;     // List of chains to visit.
-  std::set<SDNode *> Visited;           // Visited node set.
+  SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
  
    // Get alias information for node.
    SDValue Ptr;
-  int64_t Size = 0;
-  const Value *SrcValue = 0;
-  int SrcValueOffset = 0;
-  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset);
+  int64_t Size;
+  const Value *SrcValue;
+  int SrcValueOffset;
+  unsigned SrcValueAlign;
+  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, 
+                              SrcValueAlign);
  
    // Starting off.
    Chains.push_back(OriginalChain);
-
+  unsigned Depth = 0;
+  
    // Look at each chain and determine if it is an alias.  If so, add it to the
    // aliases list.  If not, then continue up the chain looking for the next
    // candidate.
    while (!Chains.empty()) {
      SDValue Chain = Chains.back();
      Chains.pop_back();
+    
+    // For TokenFactor nodes, look at each operand and only continue up the 
+    // chain until we find two aliases.  If we've seen two aliases, assume we'll 
+    // find more and revert to original chain since the xform is unlikely to be
+    // profitable.
+    // 
+    // FIXME: The depth check could be made to return the last non-aliasing 
+    // chain we found before we hit a tokenfactor rather than the original
+    // chain.
+    if (Depth > 6 || Aliases.size() == 2) {
+      Aliases.clear();
+      Aliases.push_back(OriginalChain);
+      break;
+    }
  
-     // Don't bother if we've been before.
-    if (Visited.find(Chain.getNode()) != Visited.end()) continue;
-    Visited.insert(Chain.getNode());
+    // Don't bother if we've been before.
+    if (!Visited.insert(Chain.getNode()))
+      continue;
  
      switch (Chain.getOpcode()) {
      case ISD::EntryToken:
@@ -6126,35 +6459,40 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
      case ISD::STORE: {
        // Get alias information for Chain.
        SDValue OpPtr;
-      int64_t OpSize = 0;
-      const Value *OpSrcValue = 0;
-      int OpSrcValueOffset = 0;
+      int64_t OpSize;
+      const Value *OpSrcValue;
+      int OpSrcValueOffset;
+      unsigned OpSrcValueAlign;
        bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
-                                    OpSrcValue, OpSrcValueOffset);
+                                    OpSrcValue, OpSrcValueOffset,
+                                    OpSrcValueAlign);
  
        // If chain is alias then stop here.
        if (!(IsLoad && IsOpLoad) &&
-          isAlias(Ptr, Size, SrcValue, SrcValueOffset,
-                  OpPtr, OpSize, OpSrcValue, OpSrcValueOffset)) {
+          isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+                  OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
+                  OpSrcValueAlign)) {
          Aliases.push_back(Chain);
        } else {
          // Look further up the chain.
          Chains.push_back(Chain.getOperand(0));
-        // Clean up old chain.
-        AddToWorkList(Chain.getNode());
+        ++Depth;
        }
        break;
      }
  
      case ISD::TokenFactor:
-      // We have to check each of the operands of the token factor, so we queue
-      // then up.  Adding the  operands to the queue (stack) in reverse order
-      // maintains the original order and increases the likelihood that getNode
-      // will find a matching token factor (CSE.)
+      // We have to check each of the operands of the token factor for "small"
+      // token factors, so we queue them up.  Adding the operands to the queue
+      // (stack) in reverse order maintains the original order and increases the
+      // likelihood that getNode will find a matching token factor (CSE.)
+      if (Chain.getNumOperands() > 16) {
+        Aliases.push_back(Chain);
+        break;
+      }
        for (unsigned n = Chain.getNumOperands(); n;)
          Chains.push_back(Chain.getOperand(--n));
-      // Eliminate the token factor if we can.
-      AddToWorkList(Chain.getNode());
+      ++Depth;
        break;
  
      default:
@@ -6180,15 +6518,10 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
      // If a single operand then chain to it.  We don't need to revisit it.
      return Aliases[0];
    }
-
+  
    // Construct a custom tailored token factor.
-  SDValue NewChain = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
-                                 &Aliases[0], Aliases.size());
-
-  // Make sure the old chain gets cleaned up.
-  if (NewChain != OldChain) AddToWorkList(OldChain.getNode());
-
-  return NewChain;
+  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 
+                     &Aliases[0], Aliases.size());
  }
  
  // SelectionDAG::Combine - This is the entry point for the file.