Remove unused but set variables.
[oota-llvm.git] / lib / CodeGen / SelectionDAG / LegalizeDAG.cpp
index 661a7ad6d877ce1da8bcd588503d2baceadb8c9a..4b6d3ef547388ab95255587e200c9c2b76cfaed5 100644 (file)
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -57,19 +50,13 @@ class SelectionDAGLegalize {
   const TargetMachine &TM;
   const TargetLowering &TLI;
   SelectionDAG &DAG;
-  CodeGenOpt::Level OptLevel;
 
   // Libcall insertion helpers.
 
-  /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
+  /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been
   /// legalized.  We use this to ensure that calls are properly serialized
   /// against each other, including inserted libcalls.
-  SDValue LastCALLSEQ_END;
-
-  /// IsLegalizingCall - This member is used *only* for purposes of providing
-  /// helpful assertions that a libcall isn't created while another call is
-  /// being legalized (which could lead to non-serialized call sequences).
-  bool IsLegalizingCall;
+  SmallVector<SDValue, 8> LastCALLSEQ;
 
   enum LegalizeAction {
     Legal,      // The target natively supports this operation.
@@ -92,16 +79,19 @@ class SelectionDAGLegalize {
     // If someone requests legalization of the new node, return itself.
     if (From != To)
       LegalizedNodes.insert(std::make_pair(To, To));
+
+    // Transfer SDDbgValues.
+    DAG.TransferDbgValues(From, To);
   }
 
 public:
-  SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol);
+  explicit SelectionDAGLegalize(SelectionDAG &DAG);
 
   /// getTypeAction - Return how we should legalize values of this type, either
   /// it is already legal or we need to expand it into multiple registers of
   /// smaller integer type, or we need to promote it to a larger type.
   LegalizeAction getTypeAction(EVT VT) const {
-    return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
+    return (LegalizeAction)TLI.getTypeAction(*DAG.getContext(), VT);
   }
 
   /// isTypeLegal - Return true if this type is legal on this target.
@@ -144,6 +134,9 @@ private:
                              DebugLoc dl);
 
   SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+  SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
+                        unsigned NumOps, bool isSigned, DebugLoc dl);
+
   std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
                                                  SDNode *Node, bool isSigned);
   SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
@@ -155,6 +148,7 @@ private:
                            RTLIB::Libcall Call_I32,
                            RTLIB::Libcall Call_I64,
                            RTLIB::Libcall Call_I128);
+  void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
   SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
   SDValue ExpandBUILD_VECTOR(SDNode *Node);
@@ -173,12 +167,22 @@ private:
   SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
 
   SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+  SDValue ExpandInsertToVectorThroughStack(SDValue Op);
   SDValue ExpandVectorBuildThroughStack(SDNode* Node);
 
   std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
 
   void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+
+  SDValue getLastCALLSEQ() { return LastCALLSEQ.back();  }
+  void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; }
+  void pushLastCALLSEQ(SDValue s) {
+    LastCALLSEQ.push_back(s);
+  }
+  void popLastCALLSEQ() {
+    LastCALLSEQ.pop_back();
+  }
 };
 }
 
@@ -214,18 +218,16 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl,
   return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
 }
 
-SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
-                                           CodeGenOpt::Level ol)
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
   : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
-    DAG(dag), OptLevel(ol),
+    DAG(dag),
     ValueTypeActions(TLI.getValueTypeActions()) {
   assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
          "Too many value types for ValueTypeActions to hold!");
 }
 
 void SelectionDAGLegalize::LegalizeDAG() {
-  LastCALLSEQ_END = DAG.getEntryNode();
-  IsLegalizingCall = false;
+  pushLastCALLSEQ(DAG.getEntryNode());
 
   // The legalize process is inherently a bottom-up recursive process (users
   // legalize their uses before themselves).  Given infinite stack space, we
@@ -253,10 +255,16 @@ void SelectionDAGLegalize::LegalizeDAG() {
 /// FindCallEndFromCallStart - Given a chained node that is part of a call
 /// sequence, find the CALLSEQ_END node that terminates the call sequence.
 static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
+  int next_depth = depth;
   if (Node->getOpcode() == ISD::CALLSEQ_START)
-    depth++;
-  if ((Node->getOpcode() == ISD::CALLSEQ_END) && (depth == 1))
-    return Node;
+    next_depth = depth + 1;
+  if (Node->getOpcode() == ISD::CALLSEQ_END) {
+    assert(depth > 0 && "negative depth!");
+    if (depth == 1)
+      return Node;
+    else
+      next_depth = depth - 1;
+  }
   if (Node->use_empty())
     return 0;   // No CallSeqEnd
 
@@ -286,7 +294,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
     SDNode *User = *UI;
     for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
       if (User->getOperand(i) == TheChain)
-        if (SDNode *Result = FindCallEndFromCallStart(User, depth))
+        if (SDNode *Result = FindCallEndFromCallStart(User, next_depth))
           return Result;
   }
   return 0;
@@ -307,6 +315,7 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
     case ISD::CALLSEQ_START:
       if (!nested)
         return Node;
+      Node = Node->getOperand(0).getNode();
       nested--;
       break;
     case ISD::CALLSEQ_END:
@@ -314,7 +323,7 @@ static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
       break;
     }
   }
-  return 0;
+  return (Node->getOpcode() == ISD::CALLSEQ_START) ? Node : 0;
 }
 
 /// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
@@ -394,7 +403,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
   SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
   if (Extend)
-    return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl,
+    return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
                           DAG.getEntryNode(),
                           CPIdx, MachinePointerInfo::getConstantPool(),
                           VT, false, false, Alignment);
@@ -423,68 +432,67 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
       return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
                           ST->isVolatile(), ST->isNonTemporal(), Alignment);
-    } else {
-      // Do a (aligned) store to a stack slot, then copy from the stack slot
-      // to the final destination using (unaligned) integer loads and stores.
-      EVT StoredVT = ST->getMemoryVT();
-      EVT RegVT =
-        TLI.getRegisterType(*DAG.getContext(),
-                            EVT::getIntegerVT(*DAG.getContext(),
-                                              StoredVT.getSizeInBits()));
-      unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
-      unsigned RegBytes = RegVT.getSizeInBits() / 8;
-      unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
-
-      // Make sure the stack slot is also aligned for the register type.
-      SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
-
-      // Perform the original store, only redirected to the stack slot.
-      SDValue Store = DAG.getTruncStore(Chain, dl,
-                                        Val, StackPtr, MachinePointerInfo(),
-                                        StoredVT, false, false, 0);
-      SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
-      SmallVector<SDValue, 8> Stores;
-      unsigned Offset = 0;
-
-      // Do all but one copies using the full register width.
-      for (unsigned i = 1; i < NumRegs; i++) {
-        // Load one integer register's worth from the stack slot.
-        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
-                                   MachinePointerInfo(),
-                                   false, false, 0);
-        // Store it to the final location.  Remember the store.
-        Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
-                                    ST->getPointerInfo().getWithOffset(Offset),
-                                      ST->isVolatile(), ST->isNonTemporal(),
-                                      MinAlign(ST->getAlignment(), Offset)));
-        // Increment the pointers.
-        Offset += RegBytes;
-        StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                               Increment);
-        Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
-      }
+    }
+    // Do a (aligned) store to a stack slot, then copy from the stack slot
+    // to the final destination using (unaligned) integer loads and stores.
+    EVT StoredVT = ST->getMemoryVT();
+    EVT RegVT =
+      TLI.getRegisterType(*DAG.getContext(),
+                          EVT::getIntegerVT(*DAG.getContext(),
+                                            StoredVT.getSizeInBits()));
+    unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+    unsigned RegBytes = RegVT.getSizeInBits() / 8;
+    unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
 
-      // The last store may be partial.  Do a truncating store.  On big-endian
-      // machines this requires an extending load from the stack slot to ensure
-      // that the bits are in the right place.
-      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
-                                    8 * (StoredBytes - Offset));
+    // Make sure the stack slot is also aligned for the register type.
+    SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
 
-      // Load from the stack slot.
-      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr,
-                                    MachinePointerInfo(),
-                                    MemVT, false, false, 0);
+    // Perform the original store, only redirected to the stack slot.
+    SDValue Store = DAG.getTruncStore(Chain, dl,
+                                      Val, StackPtr, MachinePointerInfo(),
+                                      StoredVT, false, false, 0);
+    SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+    SmallVector<SDValue, 8> Stores;
+    unsigned Offset = 0;
 
-      Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
-                                         ST->getPointerInfo()
-                                           .getWithOffset(Offset),
-                                         MemVT, ST->isVolatile(),
-                                         ST->isNonTemporal(),
-                                         MinAlign(ST->getAlignment(), Offset)));
-      // The order of the stores doesn't matter - say it with a TokenFactor.
-      return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
-                         Stores.size());
+    // Do all but one copies using the full register width.
+    for (unsigned i = 1; i < NumRegs; i++) {
+      // Load one integer register's worth from the stack slot.
+      SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+                                 MachinePointerInfo(),
+                                 false, false, 0);
+      // Store it to the final location.  Remember the store.
+      Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+                                  ST->getPointerInfo().getWithOffset(Offset),
+                                    ST->isVolatile(), ST->isNonTemporal(),
+                                    MinAlign(ST->getAlignment(), Offset)));
+      // Increment the pointers.
+      Offset += RegBytes;
+      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                             Increment);
+      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
     }
+
+    // The last store may be partial.  Do a truncating store.  On big-endian
+    // machines this requires an extending load from the stack slot to ensure
+    // that the bits are in the right place.
+    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                  8 * (StoredBytes - Offset));
+
+    // Load from the stack slot.
+    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+                                  MachinePointerInfo(),
+                                  MemVT, false, false, 0);
+
+    Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+                                       ST->getPointerInfo()
+                                         .getWithOffset(Offset),
+                                       MemVT, ST->isVolatile(),
+                                       ST->isNonTemporal(),
+                                       MinAlign(ST->getAlignment(), Offset)));
+    // The order of the stores doesn't matter - say it with a TokenFactor.
+    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+                       Stores.size());
   }
   assert(ST->getMemoryVT().isInteger() &&
          !ST->getMemoryVT().isVector() &&
@@ -495,7 +503,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
   int IncrementSize = NumBits / 8;
 
   // Divide the stored value in two parts.
-  SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+  SDValue ShiftAmount = DAG.getConstant(NumBits,
+                                      TLI.getShiftAmountTy(Val.getValueType()));
   SDValue Lo = Val;
   SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
 
@@ -575,7 +584,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
     // The last copy may be partial.  Do an extending load.
     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
                                   8 * (LoadedBytes - Offset));
-    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr,
+    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
                                   LD->getPointerInfo().getWithOffset(Offset),
                                   MemVT, LD->isVolatile(),
                                   LD->isNonTemporal(),
@@ -592,7 +601,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
                              Stores.size());
 
     // Finally, perform the original load only redirected to the stack slot.
-    Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase,
+    Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
                           MachinePointerInfo(), LoadedVT, false, false, 0);
 
     // Callers expect a MERGE_VALUES node.
@@ -620,29 +629,30 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   // Load the value in two parts
   SDValue Lo, Hi;
   if (TLI.isLittleEndian()) {
-    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getPointerInfo(),
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
                         NewLoadedVT, LD->isVolatile(),
                         LD->isNonTemporal(), Alignment);
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(IncrementSize, TLI.getPointerTy()));
-    Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr,
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
                         LD->getPointerInfo().getWithOffset(IncrementSize),
                         NewLoadedVT, LD->isVolatile(),
                         LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
   } else {
-    Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getPointerInfo(),
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
                         NewLoadedVT, LD->isVolatile(),
                         LD->isNonTemporal(), Alignment);
     Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                       DAG.getConstant(IncrementSize, TLI.getPointerTy()));
-    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr,
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
                         LD->getPointerInfo().getWithOffset(IncrementSize),
                         NewLoadedVT, LD->isVolatile(),
                         LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
   }
 
   // aggregate the two parts
-  SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy());
+  SDValue ShiftAmount = DAG.getConstant(NumBits,
+                                       TLI.getShiftAmountTy(Hi.getValueType()));
   SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
   Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
 
@@ -781,7 +791,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
       }
     }
   }
-  return SDValue();
+  return SDValue(0, 0);
 }
 
 /// LegalizeOp - We know that the specified value has a legal type, and
@@ -814,7 +824,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   bool isCustom = false;
 
   // Figure out the correct action; the way to query this varies by opcode
-  TargetLowering::LegalizeAction Action;
+  TargetLowering::LegalizeAction Action = TargetLowering::Legal;
   bool SimpleFinishLegalizing = true;
   switch (Node->getOpcode()) {
   case ISD::INTRINSIC_W_CHAIN:
@@ -929,11 +939,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     case ISD::BR_JT:
     case ISD::BR_CC:
     case ISD::BRCOND:
-      // Branches tweak the chain to include LastCALLSEQ_END
+      assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+      // Branches tweak the chain to include LastCALLSEQ
       Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
-                            LastCALLSEQ_END);
+                           getLastCALLSEQ());
       Ops[0] = LegalizeOp(Ops[0]);
-      LastCALLSEQ_END = DAG.getEntryNode();
+      setLastCALLSEQ(DAG.getEntryNode());
       break;
     case ISD::SHL:
     case ISD::SRL:
@@ -943,7 +954,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       // Legalizing shifts/rotates requires adjusting the shift amount
       // to the appropriate width.
       if (!Ops[1].getValueType().isVector())
-        Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1]));
+        Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+                                                      Ops[1]));
       break;
     case ISD::SRL_PARTS:
     case ISD::SRA_PARTS:
@@ -951,7 +963,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       // Legalizing shifts/rotates requires adjusting the shift amount
       // to the appropriate width.
       if (!Ops[2].getValueType().isVector())
-        Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2]));
+        Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+                                                      Ops[2]));
       break;
     }
 
@@ -1020,6 +1033,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     break;
   case ISD::CALLSEQ_START: {
     SDNode *CallEnd = FindCallEndFromCallStart(Node);
+    assert(CallEnd && "didn't find CALLSEQ_END!");
 
     // Recursively Legalize all of the inputs of the call end that do not lead
     // to this call start.  This ensures that any libcalls that need be inserted
@@ -1036,9 +1050,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
     // Merge in the last call to ensure that this call starts after the last
     // call ended.
-    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+    if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) {
       Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                         Tmp1, LastCALLSEQ_END);
+                         Tmp1, getLastCALLSEQ());
       Tmp1 = LegalizeOp(Tmp1);
     }
 
@@ -1059,25 +1073,29 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     // sequence have been legalized, legalize the call itself.  During this
     // process, no libcalls can/will be inserted, guaranteeing that no calls
     // can overlap.
-    assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
     // Note that we are selecting this call!
-    LastCALLSEQ_END = SDValue(CallEnd, 0);
-    IsLegalizingCall = true;
+    setLastCALLSEQ(SDValue(CallEnd, 0));
 
     // Legalize the call, starting from the CALLSEQ_END.
-    LegalizeOp(LastCALLSEQ_END);
-    assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+    LegalizeOp(getLastCALLSEQ());
     return Result;
   }
   case ISD::CALLSEQ_END:
-    // If the CALLSEQ_START node hasn't been legalized first, legalize it.  This
-    // will cause this node to be legalized as well as handling libcalls right.
-    if (LastCALLSEQ_END.getNode() != Node) {
-      LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
-      DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
-      assert(I != LegalizedNodes.end() &&
-             "Legalizing the call start should have legalized this node!");
-      return I->second;
+    {
+      SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node);
+
+      // If the CALLSEQ_START node hasn't been legalized first, legalize it.
+      // This will cause this node to be legalized as well as handling libcalls
+      // right.
+      if (getLastCALLSEQ().getNode() != Node) {
+        LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0));
+        DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+        assert(I != LegalizedNodes.end() &&
+               "Legalizing the call start should have legalized this node!");
+        return I->second;
+      }
+
+      pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0));
     }
 
     // Otherwise, the call start has been legalized and everything is going
@@ -1105,9 +1123,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                          Result.getResNo());
       }
     }
-    assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
     // This finishes up call legalization.
-    IsLegalizingCall = false;
+    popLastCALLSEQ();
 
     // If the CALLSEQ_END node has a flag, remember that we legalized it.
     AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
@@ -1202,7 +1219,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       ISD::LoadExtType NewExtType =
         ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
 
-      Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl,
+      Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
                               Tmp1, Tmp2, LD->getPointerInfo(),
                               NVT, isVolatile, isNonTemporal, Alignment);
 
@@ -1238,7 +1255,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       if (TLI.isLittleEndian()) {
         // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
         // Load the bottom RoundWidth bits.
-        Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl,
+        Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
                             Tmp1, Tmp2,
                             LD->getPointerInfo(), RoundVT, isVolatile,
                             isNonTemporal, Alignment);
@@ -1247,7 +1264,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         IncrementSize = RoundWidth / 8;
         Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                            DAG.getIntPtrConstant(IncrementSize));
-        Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
+        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
                             LD->getPointerInfo().getWithOffset(IncrementSize),
                             ExtraVT, isVolatile, isNonTemporal,
                             MinAlign(Alignment, IncrementSize));
@@ -1259,7 +1276,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
         // Move the top bits to the right place.
         Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                         DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+                         DAG.getConstant(RoundWidth,
+                                      TLI.getShiftAmountTy(Hi.getValueType())));
 
         // Join the hi and lo parts.
         Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1267,7 +1285,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         // Big endian - avoid unaligned loads.
         // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
         // Load the top RoundWidth bits.
-        Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
+        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
                             LD->getPointerInfo(), RoundVT, isVolatile,
                             isNonTemporal, Alignment);
 
@@ -1276,7 +1294,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                            DAG.getIntPtrConstant(IncrementSize));
         Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
-                            Node->getValueType(0), dl, Tmp1, Tmp2,
+                            dl, Node->getValueType(0), Tmp1, Tmp2,
                             LD->getPointerInfo().getWithOffset(IncrementSize),
                             ExtraVT, isVolatile, isNonTemporal,
                             MinAlign(Alignment, IncrementSize));
@@ -1288,7 +1306,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
         // Move the top bits to the right place.
         Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
-                         DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+                         DAG.getConstant(ExtraWidth,
+                                      TLI.getShiftAmountTy(Hi.getValueType())));
 
         // Join the hi and lo parts.
         Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
@@ -1355,6 +1374,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Tmp2 = LegalizeOp(Load.getValue(1));
           break;
         }
+
+        // If this is a promoted vector load, and the vector element types are
+        // legal, then scalarize it.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
+          isTypeLegal(Node->getValueType(0).getScalarType())) {
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  Node->getValueType(0).getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+
+            LoadVals.push_back(ScalarLoad.getValue(0));
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
+            Node->getValueType(0), &LoadVals[0], LoadVals.size());
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+        }
+
+        // If this is a promoted vector load, and the vector element types are
+        // illegal, create the promoted vector from bitcasted segments.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
+          EVT MemElemTy = Node->getValueType(0).getScalarType();
+          EVT SrcSclrTy = SrcVT.getScalarType();
+          unsigned SizeRatio =
+            (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
+
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  SrcVT.getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+            if (TLI.isBigEndian()) {
+              // MSB (which is garbage, comes first)
+              LoadVals.push_back(ScalarLoad.getValue(0));
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+            } else {
+              // LSB (which is data, comes first)
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+              LoadVals.push_back(ScalarLoad.getValue(0));
+            }
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
+            SrcVT.getScalarType(), NumElem*SizeRatio);
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, 
+            TempWideVector, &LoadVals[0], LoadVals.size());
+
+          // Cast to the correct type
+          ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+
+        }
+
         // FIXME: This does not work for vectors on most targets.  Sign- and
         // zero-extend operations are currently folded into extending loads,
         // whether they are legal or not, and then we end up here without any
@@ -1363,7 +1467,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                "EXTLOAD should always be supported!");
         // Turn the unsupported load into an EXTLOAD followed by an explicit
         // zero/sign extend inreg.
-        Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl,
+        Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
                                 Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
                                 LD->isVolatile(), LD->isNonTemporal(),
                                 LD->getAlignment());
@@ -1373,7 +1477,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                                Result.getValueType(),
                                Result, DAG.getValueType(SrcVT));
         else
-          ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
+          ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
         Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
         Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes.
         break;
@@ -1477,7 +1581,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
                              DAG.getIntPtrConstant(IncrementSize));
           Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
-                           DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+                           DAG.getConstant(RoundWidth,
+                                    TLI.getShiftAmountTy(Tmp3.getValueType())));
           Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
                              ST->getPointerInfo().getWithOffset(IncrementSize),
                                  ExtraVT, isVolatile, isNonTemporal,
@@ -1487,7 +1592,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
           // Store the top RoundWidth bits.
           Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
-                           DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+                           DAG.getConstant(ExtraWidth,
+                                    TLI.getShiftAmountTy(Tmp3.getValueType())));
           Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
                                  RoundVT, isVolatile, isNonTemporal, Alignment);
 
@@ -1528,6 +1634,88 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Result = TLI.LowerOperation(Result, DAG);
           break;
         case Expand:
+
+          EVT WideScalarVT = Tmp3.getValueType().getScalarType();
+          EVT NarrowScalarVT = StVT.getScalarType();
+
+          // The Store type is illegal, must scalarize the vector store.
+          SmallVector<SDValue, 8> Stores;
+          bool ScalarLegal = isTypeLegal(WideScalarVT);
+          if (!isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) {
+            unsigned NumElem = StVT.getVectorNumElements();
+
+            unsigned ScalarSize = StVT.getScalarType().getSizeInBits();
+            // Round odd types to the next pow of two.
+            if (!isPowerOf2_32(ScalarSize))
+              ScalarSize = NextPowerOf2(ScalarSize);
+            // Types smaller than 8 bits are promoted to 8 bits.
+            ScalarSize = std::max<unsigned>(ScalarSize, 8);
+            // Store stride
+            unsigned Stride = ScalarSize/8;
+            assert(isPowerOf2_32(Stride) && "Stride must be a power of two");
+
+            for (unsigned Idx=0; Idx<NumElem; Idx++) {
+              SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                       WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+
+
+              EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
+
+              Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex);
+              Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                 DAG.getIntPtrConstant(Stride));
+              SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+                                           ST->getPointerInfo().getWithOffset(Idx*Stride),
+                                           isVolatile, isNonTemporal, Alignment);
+              Stores.push_back(Store);
+            }
+            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                 &Stores[0], Stores.size());
+            break;
+          }
+
+          // The Store type is illegal, must scalarize the vector store.
+          // However, the scalar type is illegal. Must bitcast the result
+          // and store it in smaller parts.
+          if (!isTypeLegal(StVT) && StVT.isVector()) {
+            unsigned WideNumElem = StVT.getVectorNumElements();
+            unsigned Stride = NarrowScalarVT.getSizeInBits()/8;
+
+            unsigned SizeRatio =
+              (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits());
+
+            EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT,
+                                               SizeRatio*WideNumElem);
+
+            // Cast the wide elem vector to wider vec with smaller elem type.
+            // Example <2 x i64> -> <4 x i32>
+            Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
+
+            for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) {
+              // Extract elment i
+              SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                       NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+              // bump pointer.
+              Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                 DAG.getIntPtrConstant(Stride));
+
+              // Store if, this element is:
+              //  - First element on big endian, or
+              //  - Last element on little endian
+              if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) ||
+                  ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) {
+                SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+                                             ST->getPointerInfo().getWithOffset(Idx*Stride),
+                                             isVolatile, isNonTemporal, Alignment);
+                Stores.push_back(Store);
+              }
+            }
+            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                 &Stores[0], Stores.size());
+            break;
+          }
+
+
           // TRUNCSTORE:i16 i32 -> STORE i16
           assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
           Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
@@ -1578,12 +1766,56 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   if (Op.getValueType().isVector())
     return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
                        false, false, 0);
-  return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr,
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
                         MachinePointerInfo(),
                         Vec.getValueType().getVectorElementType(),
                         false, false, 0);
 }
 
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+  assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+  SDValue Vec  = Op.getOperand(0);
+  SDValue Part = Op.getOperand(1);
+  SDValue Idx  = Op.getOperand(2);
+  DebugLoc dl  = Op.getDebugLoc();
+
+  // Store the value to a temporary stack slot, then LOAD the returned part.
+
+  SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+  int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+  // First store the whole vector.
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+                            false, false, 0);
+
+  // Then store the inserted part.
+
+  // Add the offset to the index.
+  unsigned EltSize =
+      Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+  Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+                    DAG.getConstant(EltSize, Idx.getValueType()));
+
+  if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+    Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+  else
+    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+  SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+                                    StackPtr);
+
+  // Store the subvector.
+  Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+                    MachinePointerInfo(), false, false, 0);
+
+  // Finally, load the updated vector.
+  return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+                     false, false, 0);
+}
+
 SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
   // We can't handle this case efficiently.  Allocate a sufficiently
   // aligned object on the stack, store each element into it, then load
@@ -1678,7 +1910,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
       assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
       if (BitShift)
         SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
-                              DAG.getConstant(BitShift,TLI.getShiftAmountTy()));
+                              DAG.getConstant(BitShift,
+                                 TLI.getShiftAmountTy(SignBit.getValueType())));
     }
   }
   // Now get the sign bit proper, by seeing whether the value is negative.
@@ -1714,7 +1947,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
   SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
   Chain = SP.getValue(1);
   unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
-  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
   if (Align > StackAlign)
     SP = DAG.getNode(ISD::AND, dl, VT, SP,
                       DAG.getConstant(-(uint64_t)Align, VT));
@@ -1815,7 +2048,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
                        false, false, DestAlign);
 
   assert(SlotSize < DestSize && "Unknown extension!");
-  return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr,
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
                         PtrInfo, SlotVT, false, false, DestAlign);
 }
 
@@ -1944,7 +2177,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
 // and leave the Hi part unset.
 SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
                                             bool isSigned) {
-  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
   // The input chain to this libcall is the entry node of the function.
   // Legalizing the call will automatically add the previous call to the
   // dependence.
@@ -1979,10 +2211,44 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
     // It's a tailcall, return the chain (which is the DAG root).
     return DAG.getRoot();
 
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+  return CallInfo.first;
+}
+
+/// ExpandLibCall - Generate a libcall taking the given operands as arguments
+/// and returning a result of type RetVT.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
+                                            const SDValue *Ops, unsigned NumOps,
+                                            bool isSigned, DebugLoc dl) {
+  TargetLowering::ArgListTy Args;
+  Args.reserve(NumOps);
+
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    Entry.Node = Ops[i];
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue,SDValue> CallInfo =
+  TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+                  false, 0, TLI.getLibcallCallingConv(LC), false,
+                  /*isReturnValueUsed=*/true,
+                  Callee, Args, DAG, dl);
+
   // Legalize the call sequence, starting with the chain.  This will advance
   // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
   // was added by LowerCallTo (guaranteeing proper serialization of calls).
   LegalizeOp(CallInfo.second);
+
   return CallInfo.first;
 }
 
@@ -1992,7 +2258,6 @@ std::pair<SDValue, SDValue>
 SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
                                          SDNode *Node,
                                          bool isSigned) {
-  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
   SDValue InChain = Node->getOperand(0);
 
   TargetLowering::ArgListTy Args;
@@ -2018,7 +2283,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
                     Callee, Args, DAG, Node->getDebugLoc());
 
   // Legalize the call sequence, starting with the chain.  This will advance
-  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
   // was added by LowerCallTo (guaranteeing proper serialization of calls).
   LegalizeOp(CallInfo.second);
   return CallInfo;
@@ -2058,6 +2323,113 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
   return ExpandLibCall(LC, Node, isSigned);
 }
 
+/// isDivRemLibcallAvailable - Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+                                     const TargetLowering &TLI) {
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unexpected request for libcall!");
+  case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
+  case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+  case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+  case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+  }
+
+  return TLI.getLibcallName(LC) != 0;
+}
+
+/// UseDivRem - Only issue divrem libcall if both quotient and remainder are
+/// needed.
+static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) {
+  unsigned OtherOpcode = 0;
+  if (isSigned)
+    OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
+  else
+    OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
+
+  SDValue Op0 = Node->getOperand(0);
+  SDValue Op1 = Node->getOperand(1);
+  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+         UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+    SDNode *User = *UI;
+    if (User == Node)
+      continue;
+    if (User->getOpcode() == OtherOpcode &&
+        User->getOperand(0) == Op0 &&
+        User->getOperand(1) == Op1)
+      return true;
+  }
+  return false;
+}
+
+/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem
+/// pairs.
+void
+SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
+                                          SmallVectorImpl<SDValue> &Results) {
+  unsigned Opcode = Node->getOpcode();
+  bool isSigned = Opcode == ISD::SDIVREM;
+
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unexpected request for libcall!");
+  case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
+  case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+  case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+  case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+  }
+
+  // The input chain to this libcall is the entry node of the function.
+  // Legalizing the call will automatically add the previous call to the
+  // dependence.
+  SDValue InChain = DAG.getEntryNode();
+
+  EVT RetVT = Node->getValueType(0);
+  const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+
+  // Also pass the return address of the remainder.
+  SDValue FIPtr = DAG.CreateStackTemporary(RetVT);
+  Entry.Node = FIPtr;
+  Entry.Ty = RetTy->getPointerTo();
+  Entry.isSExt = isSigned;
+  Entry.isZExt = !isSigned;
+  Args.push_back(Entry);
+
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  DebugLoc dl = Node->getDebugLoc();
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+                    /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
+
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+
+  // Remainder is loaded back from the stack frame.
+  SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr,
+                            MachinePointerInfo(), false, false, 0);
+  Results.push_back(CallInfo.first);
+  Results.push_back(Rem);
+}
+
 /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
 /// INT_TO_FP operation of the specified operand when the target requests that
 /// we expand it.  At this point, we know that the result and operand types are
@@ -2160,7 +2532,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     if (!isSigned) {
       SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
 
-      SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy());
+      SDValue ShiftConst =
+          DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType()));
       SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
       SDValue AndConst = DAG.getConstant(1, MVT::i64);
       SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
@@ -2179,7 +2552,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
     }
 
     // Otherwise, implement the fully general conversion.
-    EVT SHVT = TLI.getShiftAmountTy();
 
     SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
          DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
@@ -2194,6 +2566,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                    Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
                    ISD::SETUGE);
     SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
+    EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
 
     SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
                              DAG.getConstant(32, SHVT));
@@ -2244,7 +2617,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                              false, false, Alignment);
   else {
     FudgeInReg =
-      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl,
+      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
                                 DAG.getEntryNode(), CPIdx,
                                 MachinePointerInfo::getConstantPool(),
                                 MVT::f32, false, false, Alignment));
@@ -2340,7 +2713,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
 ///
 SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
   EVT VT = Op.getValueType();
-  EVT SHVT = TLI.getShiftAmountTy();
+  EVT SHVT = TLI.getShiftAmountTy(VT);
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
   switch (VT.getSimpleVT().SimpleTy) {
   default: assert(0 && "Unhandled Expand type in BSWAP!");
@@ -2383,6 +2756,18 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
   }
 }
 
+/// SplatByte - Distribute ByteVal over NumBits bits.
+// FIXME: Move this helper to a common place.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+  APInt Val = APInt(NumBits, ByteVal);
+  unsigned Shift = 8;
+  for (unsigned i = NumBits; i > 8; i >>= 1) {
+    Val = (Val << Shift) | Val;
+    Shift <<= 1;
+  }
+  return Val;
+}
+
 /// ExpandBitCount - Expand the specified bitcount instruction into operations.
 ///
 SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
@@ -2390,26 +2775,45 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
   switch (Opc) {
   default: assert(0 && "Cannot expand this yet!");
   case ISD::CTPOP: {
-    static const uint64_t mask[6] = {
-      0x5555555555555555ULL, 0x3333333333333333ULL,
-      0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
-      0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
-    };
     EVT VT = Op.getValueType();
-    EVT ShVT = TLI.getShiftAmountTy();
-    unsigned len = VT.getSizeInBits();
-    for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
-      //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
-      unsigned EltSize = VT.isVector() ?
-        VT.getVectorElementType().getSizeInBits() : len;
-      SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT);
-      SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
-      Op = DAG.getNode(ISD::ADD, dl, VT,
-                       DAG.getNode(ISD::AND, dl, VT, Op, Tmp2),
-                       DAG.getNode(ISD::AND, dl, VT,
-                                   DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3),
-                                   Tmp2));
-    }
+    EVT ShVT = TLI.getShiftAmountTy(VT);
+    unsigned Len = VT.getSizeInBits();
+
+    assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
+           "CTPOP not implemented for this type.");
+
+    // This is the "best" algorithm from
+    // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+
+    SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT);
+    SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT);
+    SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT);
+    SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT);
+
+    // v = v - ((v >> 1) & 0x55555555...)
+    Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+                     DAG.getNode(ISD::AND, dl, VT,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(1, ShVT)),
+                                 Mask55));
+    // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+    Op = DAG.getNode(ISD::ADD, dl, VT,
+                     DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+                     DAG.getNode(ISD::AND, dl, VT,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(2, ShVT)),
+                                 Mask33));
+    // v = (v + (v >> 4)) & 0x0F0F0F0F...
+    Op = DAG.getNode(ISD::AND, dl, VT,
+                     DAG.getNode(ISD::ADD, dl, VT, Op,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(4, ShVT))),
+                     Mask0F);
+    // v = (v * 0x01010101...) >> (Len - 8)
+    Op = DAG.getNode(ISD::SRL, dl, VT,
+                     DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+                     DAG.getConstant(Len - 8, ShVT));
+
     return Op;
   }
   case ISD::CTLZ: {
@@ -2423,7 +2827,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     //
     // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
     EVT VT = Op.getValueType();
-    EVT ShVT = TLI.getShiftAmountTy();
+    EVT ShVT = TLI.getShiftAmountTy(VT);
     unsigned len = VT.getSizeInBits();
     for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
       SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
@@ -2595,7 +2999,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   // By default, atomic intrinsics are marked Legal and lowered. Targets
   // which don't support them directly, however, may want libcalls, in which
   // case they mark them Expand, and we get here.
-  // FIXME: Unimplemented for now. Add libcalls.
   case ISD::ATOMIC_SWAP:
   case ISD::ATOMIC_LOAD_ADD:
   case ISD::ATOMIC_LOAD_SUB:
@@ -2660,7 +3063,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     // SAR.  However, it is doubtful that any exist.
     EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
     EVT VT = Node->getValueType(0);
-    EVT ShiftAmountTy = TLI.getShiftAmountTy();
+    EVT ShiftAmountTy = TLI.getShiftAmountTy(VT);
     if (VT.isVector())
       ShiftAmountTy = VT;
     unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
@@ -2674,7 +3077,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   }
   case ISD::FP_ROUND_INREG: {
     // The only way we can lower this is to turn it into a TRUNCSTORE,
-    // EXTLOAD pair, targetting a temporary location (a stack slot).
+    // EXTLOAD pair, targeting a temporary location (a stack slot).
 
     // NOTE: there is a choice here between constantly creating new stack
     // slots and always reusing the same one.  We currently always create
@@ -2774,6 +3177,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::EXTRACT_SUBVECTOR:
     Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
     break;
+  case ISD::INSERT_SUBVECTOR:
+    Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+    break;
   case ISD::CONCAT_VECTORS: {
     Results.push_back(ExpandVectorBuildThroughStack(Node));
     break;
@@ -2821,7 +3227,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       // 1 -> Hi
       Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
                          DAG.getConstant(OpTy.getSizeInBits()/2,
-                                         TLI.getShiftAmountTy()));
+                    TLI.getShiftAmountTy(Node->getOperand(0).getValueType())));
       Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
     } else {
       // 0 -> Lo
@@ -2997,24 +3403,25 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
     Tmp2 = Node->getOperand(0);
     Tmp3 = Node->getOperand(1);
-    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+        (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         UseDivRem(Node, isSigned, false))) {
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
     } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
       // X % Y -> X-X/Y*Y
       Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
       Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
       Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
-    } else if (isSigned) {
+    } else if (isSigned)
       Tmp1 = ExpandIntLibCall(Node, true,
                               RTLIB::SREM_I8,
                               RTLIB::SREM_I16, RTLIB::SREM_I32,
                               RTLIB::SREM_I64, RTLIB::SREM_I128);
-    } else {
+    else
       Tmp1 = ExpandIntLibCall(Node, false,
                               RTLIB::UREM_I8,
                               RTLIB::UREM_I16, RTLIB::UREM_I32,
                               RTLIB::UREM_I64, RTLIB::UREM_I128);
-    }
     Results.push_back(Tmp1);
     break;
   }
@@ -3024,7 +3431,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
     EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
-    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
+        (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         UseDivRem(Node, isSigned, true)))
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
                          Node->getOperand(1));
     else if (isSigned)
@@ -3053,6 +3462,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(Tmp1.getValue(1));
     break;
   }
+  case ISD::SDIVREM:
+  case ISD::UDIVREM:
+    // Expand into divrem libcall
+    ExpandDivRemLibCall(Node, Results);
+    break;
   case ISD::MUL: {
     EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3137,6 +3551,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::UMULO:
   case ISD::SMULO: {
     EVT VT = Node->getValueType(0);
+    EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
     SDValue LHS = Node->getOperand(0);
     SDValue RHS = Node->getOperand(1);
     SDValue BottomHalf;
@@ -3152,15 +3567,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
                                RHS);
       TopHalf = BottomHalf.getValue(1);
-    } else {
-      // FIXME: We should be able to fall back to a libcall with an illegal
-      // type in some cases.
-      // Also, we can fall back to a division in some cases, but that's a big
-      // performance hit in the general case.
-      assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
-                                               VT.getSizeInBits() * 2)) &&
-             "Don't know how to expand this operation yet!");
-      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+    } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+                                                 VT.getSizeInBits() * 2))) {
       LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
       RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
       Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
@@ -3168,9 +3576,45 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                DAG.getIntPtrConstant(0));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
                             DAG.getIntPtrConstant(1));
+    } else {
+      // We can fall back to a libcall with an illegal type for the MUL if we
+      // have a libcall big enough.
+      // Also, we can fall back to a division in some cases, but that's a big
+      // performance hit in the general case.
+      RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+      if (WideVT == MVT::i16)
+        LC = RTLIB::MUL_I16;
+      else if (WideVT == MVT::i32)
+        LC = RTLIB::MUL_I32;
+      else if (WideVT == MVT::i64)
+        LC = RTLIB::MUL_I64;
+      else if (WideVT == MVT::i128)
+        LC = RTLIB::MUL_I128;
+      assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+
+      // The high part is obtained by SRA'ing all but one of the bits of low
+      // part.
+      unsigned LoSize = VT.getSizeInBits();
+      SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS,
+                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+      SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS,
+                                DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+
+      // Here we're passing the 2 arguments explicitly as 4 arguments that are
+      // pre-lowered to the correct types. This all depends upon WideVT not
+      // being a legal type for the architecture and thus has to be split to
+      // two arguments.
+      SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+      SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+      BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+                               DAG.getIntPtrConstant(0));
+      TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
+                            DAG.getIntPtrConstant(1));
     }
+
     if (isSigned) {
-      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
+      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
+                             TLI.getShiftAmountTy(BottomHalf.getValueType()));
       Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
       TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
                              ISD::SETNE);
@@ -3188,7 +3632,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
     Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
                        DAG.getConstant(PairTy.getSizeInBits()/2,
-                                       TLI.getShiftAmountTy()));
+                                       TLI.getShiftAmountTy(PairTy)));
     Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
     break;
   }
@@ -3223,7 +3667,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
 
     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
-    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr,
+    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
                                 MachinePointerInfo::getJumpTable(), MemVT,
                                 false, false, 0);
     Addr = LD;
@@ -3249,9 +3693,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                          Tmp2.getOperand(0), Tmp2.getOperand(1),
                          Node->getOperand(2));
     } else {
+      // We test only the i1 bit.  Skip the AND if UNDEF.
+      Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 :
+        DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+                    DAG.getConstant(1, Tmp2.getValueType()));
       Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
-                         DAG.getCondCode(ISD::SETNE), Tmp2,
-                         DAG.getConstant(0, Tmp2.getValueType()),
+                         DAG.getCondCode(ISD::SETNE), Tmp3,
+                         DAG.getConstant(0, Tmp3.getValueType()),
                          Node->getOperand(2));
     }
     Results.push_back(Tmp1);
@@ -3302,7 +3750,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
 
     LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
                           Tmp2, Tmp3, Tmp4, dl);
-    LastCALLSEQ_END = DAG.getEntryNode();
+    assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?");
+    setLastCALLSEQ(DAG.getEntryNode());
 
     assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
     Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
@@ -3366,7 +3815,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
     Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
-                          DAG.getConstant(DiffBits, TLI.getShiftAmountTy()));
+                          DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
     Results.push_back(Tmp1);
     break;
   }
@@ -3460,9 +3909,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
 
 // SelectionDAG::Legalize - This is the entry point for the file.
 //
-void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) {
+void SelectionDAG::Legalize() {
   /// run - This is the main entry point to this class.
   ///
-  SelectionDAGLegalize(*this, OptLevel).LegalizeDAG();
+  SelectionDAGLegalize(*this).LegalizeDAG();
 }
-