Supporting inline memmove isn't going to be worthwhile. The only way to avoid

[oota-llvm.git] / lib / Target / ARM / ARMISelLowering.cpp
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp

index cf1d591428abb90eb63d1426ace9d8128a06d4dc..b55ef700f5c237a834866d5a02e3eef274fec048 100644 (file)
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -38,6 +38,7 @@
  #include "llvm/CodeGen/MachineFrameInfo.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/CodeGen/PseudoSourceValue.h"
  #include "llvm/CodeGen/SelectionDAG.h"
@@ -107,8 +108,14 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
    EVT ElemTy = VT.getVectorElementType();
    if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
      setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
-  if (ElemTy != MVT::i32) {
+  if (ElemTy == MVT::i32) {
+    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
+  } else {
      setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
      setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
      setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
@@ -120,18 +127,12 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
    setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
    setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand);
    if (VT.isInteger()) {
      setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
      setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
      setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
-    setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
-    setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
-    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
-      setTruncStoreAction(VT.getSimpleVT(),
-                          (MVT::SimpleValueType)InnerVT, Expand);
    }
-  setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
  
    // Promote all bit-wise operations.
    if (VT.isInteger() && VT != PromotedBitwiseVT) {
@@ -421,6 +422,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
      setLibcallName(RTLIB::MEMSET,  "__aeabi_memset");
    }
  
+  // Use divmod compiler-rt calls for iOS 5.0 and later.
+  if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
+      !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
+    setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
+    setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
+  }
+
    if (Subtarget->isThumb1Only())
      addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
    else
@@ -433,6 +441,17 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
      setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    }
  
+  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+      setTruncStoreAction((MVT::SimpleValueType)VT,
+                          (MVT::SimpleValueType)InnerVT, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+    setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+    setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+  }
+
    if (Subtarget->hasNEON()) {
      addDRTypeForNEON(MVT::v2f32);
      addDRTypeForNEON(MVT::v8i8);
@@ -474,8 +493,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
      setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
      setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
  
-    setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
-
      // Neon does not support some operations on v1i64 and v2i64 types.
      setOperationAction(ISD::MUL, MVT::v1i64, Expand);
      // Custom handling for some quad-vector types to detect VMULL.
@@ -511,6 +528,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
      setTargetDAGCombine(ISD::FP_TO_SINT);
      setTargetDAGCombine(ISD::FP_TO_UINT);
      setTargetDAGCombine(ISD::FDIV);
+
+    setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
    }
  
    computeRegisterProperties();
@@ -741,6 +760,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
  
    //// temporary - rewrite interface to use type
    maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
+  maxStoresPerMemset = 16;
+  maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
  
    // On ARM arguments smaller than 4 bytes are extended, so all arguments
    // are at least 4 bytes aligned.
@@ -976,7 +997,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
      if (VT == MVT::Glue || VT == MVT::Other)
        continue;
      if (VT.isFloatingPoint() || VT.isVector())
-      return Sched::Latency;
+      return Sched::ILP;
    }
  
    if (!N->isMachineOpcode())
@@ -991,7 +1012,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
      return Sched::RegPressure;
    if (!Itins->isEmpty() &&
        Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
-    return Sched::Latency;
+    return Sched::ILP;
  
    return Sched::RegPressure;
  }
@@ -1214,8 +1235,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
    MachineFunction &MF = DAG.getMachineFunction();
    bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
    bool IsSibCall = false;
-  // Temporarily disable tail calls so things don't break.
-  if (!EnableARMTailCalls)
+  // Disable tail calls if they're not supported.
+  if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
      isTailCall = false;
    if (isTailCall) {
      // Check if it's really possible to do a tail call.
@@ -1324,7 +1345,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
            SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
            SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
                                       MachinePointerInfo(),
-                                     false, false, 0);
+                                     false, false, false, 0);
            MemOpChains.push_back(Load.getValue(1));
            RegsToPass.push_back(std::make_pair(j, Load));
          }
@@ -1340,12 +1361,10 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
        SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
        SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
                                           MVT::i32);
-      // TODO: Disable AlwaysInline when it becomes possible
-      //       to emit a nested call sequence.
        MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
                                            Flags.getByValAlign(),
                                            /*isVolatile=*/false,
-                                          /*AlwaysInline=*/true,
+                                          /*AlwaysInline=*/false,
                                            MachinePointerInfo(0),
                                            MachinePointerInfo(0)));
  
@@ -1419,21 +1438,22 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
        Callee = DAG.getLoad(getPointerTy(), dl,
                             DAG.getEntryNode(), CPAddr,
                             MachinePointerInfo::getConstantPool(),
-                           false, false, 0);
+                           false, false, false, 0);
      } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
        const char *Sym = S->getSymbol();
  
        // Create a constant pool entry for the callee address
        unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
-      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
-                                                       Sym, ARMPCLabelIndex, 0);
+      ARMConstantPoolValue *CPV =
+        ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
+                                      ARMPCLabelIndex, 0);
        // Get the address of the callee into a register
        SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
        CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
        Callee = DAG.getLoad(getPointerTy(), dl,
                             DAG.getEntryNode(), CPAddr,
                             MachinePointerInfo::getConstantPool(),
-                           false, false, 0);
+                           false, false, false, 0);
      }
    } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
      const GlobalValue *GV = G->getGlobal();
@@ -1454,7 +1474,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
        Callee = DAG.getLoad(getPointerTy(), dl,
                             DAG.getEntryNode(), CPAddr,
                             MachinePointerInfo::getConstantPool(),
-                           false, false, 0);
+                           false, false, false, 0);
        SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
        Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                             getPointerTy(), Callee, PICLabel);
@@ -1475,14 +1495,15 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
      const char *Sym = S->getSymbol();
      if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
        unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
-      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
-                                                       Sym, ARMPCLabelIndex, 4);
+      ARMConstantPoolValue *CPV =
+        ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
+                                      ARMPCLabelIndex, 4);
        SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
        CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
        Callee = DAG.getLoad(getPointerTy(), dl,
                             DAG.getEntryNode(), CPAddr,
                             MachinePointerInfo::getConstantPool(),
-                           false, false, 0);
+                           false, false, false, 0);
        SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
        Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                             getPointerTy(), Callee, PICLabel);
@@ -1953,7 +1974,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
    CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
    SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
                                 MachinePointerInfo::getConstantPool(),
-                               false, false, 0);
+                               false, false, false, 0);
    if (RelocM == Reloc::Static)
      return Result;
    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -1977,7 +1998,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
    Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
    Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
                           MachinePointerInfo::getConstantPool(),
-                         false, false, 0);
+                         false, false, false, 0);
    SDValue Chain = Argument.getValue(1);
  
    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -2025,7 +2046,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
      Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
      Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
                           MachinePointerInfo::getConstantPool(),
-                         false, false, 0);
+                         false, false, false, 0);
      Chain = Offset.getValue(1);
  
      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -2033,7 +2054,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
  
      Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
                           MachinePointerInfo::getConstantPool(),
-                         false, false, 0);
+                         false, false, false, 0);
    } else {
      // local exec model
      ARMConstantPoolValue *CPV =
@@ -2042,7 +2063,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
      Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
      Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
                           MachinePointerInfo::getConstantPool(),
-                         false, false, 0);
+                         false, false, false, 0);
    }
  
    // The address of the thread local variable is the add of the thread
@@ -2080,13 +2101,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
      SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
                                   CPAddr,
                                   MachinePointerInfo::getConstantPool(),
-                                 false, false, 0);
+                                 false, false, false, 0);
      SDValue Chain = Result.getValue(1);
      SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
      Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
      if (!UseGOTOFF)
        Result = DAG.getLoad(PtrVT, dl, Chain, Result,
-                           MachinePointerInfo::getGOT(), false, false, 0);
+                           MachinePointerInfo::getGOT(),
+                           false, false, false, 0);
      return Result;
    }
  
@@ -2103,7 +2125,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
      return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
                         MachinePointerInfo::getConstantPool(),
-                       false, false, 0);
+                       false, false, false, 0);
    }
  }
  
@@ -2131,7 +2153,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
                                   DAG.getTargetGlobalAddress(GV, dl, PtrVT));
      if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
        Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
-                           MachinePointerInfo::getGOT(), false, false, 0);
+                           MachinePointerInfo::getGOT(),
+                           false, false, false, 0);
      return Result;
    }
  
@@ -2151,7 +2174,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
  
    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
                                 MachinePointerInfo::getConstantPool(),
-                               false, false, 0);
+                               false, false, false, 0);
    SDValue Chain = Result.getValue(1);
  
    if (RelocM == Reloc::PIC_) {
@@ -2161,7 +2184,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
  
    if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
      Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
-                         false, false, 0);
+                         false, false, false, 0);
  
    return Result;
  }
@@ -2176,14 +2199,14 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
    EVT PtrVT = getPointerTy();
    DebugLoc dl = Op.getDebugLoc();
    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
-  ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
-                                                       "_GLOBAL_OFFSET_TABLE_",
-                                                       ARMPCLabelIndex, PCAdj);
+  ARMConstantPoolValue *CPV =
+    ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
+                                  ARMPCLabelIndex, PCAdj);
    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
                                 MachinePointerInfo::getConstantPool(),
-                               false, false, 0);
+                               false, false, false, 0);
    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
    return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
  }
@@ -2200,7 +2223,8 @@ SDValue
  ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
    DebugLoc dl = Op.getDebugLoc();
    SDValue Val = DAG.getConstant(0, MVT::i32);
-  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
+  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
+                     DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
                       Op.getOperand(1), Val);
  }
  
@@ -2240,7 +2264,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
      SDValue Result =
        DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
                    MachinePointerInfo::getConstantPool(),
-                  false, false, 0);
+                  false, false, false, 0);
  
      if (RelocM == Reloc::PIC_) {
        SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -2372,7 +2396,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
      ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
                              MachinePointerInfo::getFixedStack(FI),
-                            false, false, 0);
+                            false, false, false, 0);
    } else {
      Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
      ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -2508,7 +2532,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
              SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
              ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
                                      MachinePointerInfo::getFixedStack(FI),
-                                    false, false, 0);
+                                    false, false, false, 0);
            } else {
              ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
                                               Chain, DAG, dl);
@@ -2599,7 +2623,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
              SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
              InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
                                           MachinePointerInfo::getFixedStack(FI),
-                                         false, false, 0));
+                                         false, false, false, 0));
            }
            lastInsIndex = index;
          }
@@ -2834,7 +2858,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
      return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
                         Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
                         Ld->isVolatile(), Ld->isNonTemporal(),
-                       Ld->getAlignment());
+                       Ld->isInvariant(), Ld->getAlignment());
  
    llvm_unreachable("Unknown VFP cmp argument!");
  }
@@ -2853,7 +2877,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
                            Ld->getChain(), Ptr,
                            Ld->getPointerInfo(),
                            Ld->isVolatile(), Ld->isNonTemporal(),
-                          Ld->getAlignment());
+                          Ld->isInvariant(), Ld->getAlignment());
  
      EVT PtrType = Ptr.getValueType();
      unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
@@ -2863,7 +2887,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
                            Ld->getChain(), NewPtr,
                            Ld->getPointerInfo().getWithOffset(4),
                            Ld->isVolatile(), Ld->isNonTemporal(),
-                          NewAlign);
+                          Ld->isInvariant(), NewAlign);
      return;
    }
  
@@ -2987,19 +3011,33 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
    if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
      Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
                         MachinePointerInfo::getJumpTable(),
-                       false, false, 0);
+                       false, false, false, 0);
      Chain = Addr.getValue(1);
      Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
      return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
    } else {
      Addr = DAG.getLoad(PTy, dl, Chain, Addr,
-                       MachinePointerInfo::getJumpTable(), false, false, 0);
+                       MachinePointerInfo::getJumpTable(),
+                       false, false, false, 0);
      Chain = Addr.getValue(1);
      return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
    }
  }
  
+static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  assert(VT.getVectorElementType() == MVT::i32 && "Unexpected custom lowering");
+
+  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
+    return Op;
+  return DAG.UnrollVectorOp(Op.getNode());
+}
+
  static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  if (VT.isVector())
+    return LowerVectorFP_TO_INT(Op, DAG);
+
    DebugLoc dl = Op.getDebugLoc();
    unsigned Opc;
  
@@ -3021,6 +3059,12 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
    EVT VT = Op.getValueType();
    DebugLoc dl = Op.getDebugLoc();
  
+  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
+    if (VT.getVectorElementType() == MVT::f32)
+      return Op;
+    return DAG.UnrollVectorOp(Op.getNode());
+  }
+
    assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
           "Invalid type for custom lowering!");
    if (VT != MVT::v4f32)
@@ -3163,7 +3207,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
      SDValue Offset = DAG.getConstant(4, MVT::i32);
      return DAG.getLoad(VT, dl, DAG.getEntryNode(),
                         DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
-                       MachinePointerInfo(), false, false, 0);
+                       MachinePointerInfo(), false, false, false, 0);
    }
  
    // Return LR, which contains the return address. Mark it an implicit live-in.
@@ -3184,7 +3228,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
    while (Depth--)
      FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
                              MachinePointerInfo(),
-                            false, false, 0);
+                            false, false, false, 0);
    return FrameAddr;
  }
  
@@ -3933,8 +3977,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
        }
  
        // Try an immediate VMVN.
-      uint64_t NegatedImm = (SplatBits.getZExtValue() ^
-                             ((1LL << SplatBitSize) - 1));
+      uint64_t NegatedImm = (~SplatBits).getZExtValue();
        Val = isNEONModifiedImm(NegatedImm,
                                        SplatUndef.getZExtValue(), SplatBitSize,
                                        DAG, VmovVT, VT.is128BitVector(),
@@ -4047,6 +4090,14 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
        // A shuffle can only come from building a vector from various
        // elements of other vectors.
        return SDValue();
+    } else if (V.getOperand(0).getValueType().getVectorElementType() !=
+               VT.getVectorElementType()) {
+      // This code doesn't know how to handle shuffles where the vector
+      // element types do not match (this happens because type legalization
+      // promotes the return type of EXTRACT_VECTOR_ELT).
+      // FIXME: It might be appropriate to extend this code to handle
+      // mismatched types.
+      return SDValue();
      }
  
      // Record this extraction against the appropriate vector if possible...
@@ -4327,9 +4378,24 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
        // If this is undef splat, generate it via "just" vdup, if possible.
        if (Lane == -1) Lane = 0;
  
+      // Test if V1 is a SCALAR_TO_VECTOR.
        if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
          return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
        }
+      // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
+      // (and probably will turn into a SCALAR_TO_VECTOR once legalization
+      // reaches it).
+      if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
+          !isa<ConstantSDNode>(V1.getOperand(0))) {
+        bool IsScalarToVector = true;
+        for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
+          if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+            IsScalarToVector = false;
+            break;
+          }
+        if (IsScalarToVector)
+          return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+      }
        return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
                           DAG.getConstant(Lane, MVT::i32));
      }
@@ -4430,6 +4496,15 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
    return SDValue();
  }
  
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+  // INSERT_VECTOR_ELT is legal only for immediate indexes.
+  SDValue Lane = Op.getOperand(2);
+  if (!isa<ConstantSDNode>(Lane))
+    return SDValue();
+
+  return Op;
+}
+
  static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
    // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
    SDValue Lane = Op.getOperand(1);
@@ -4506,11 +4581,10 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
        unsigned EltSize = VT.getVectorElementType().getSizeInBits();
        unsigned HalfSize = EltSize / 2;
        if (isSigned) {
-        int64_t SExtVal = C->getSExtValue();
-        if ((SExtVal >> HalfSize) != (SExtVal >> EltSize))
+        if (!isIntN(HalfSize, C->getSExtValue()))
            return false;
        } else {
-        if ((C->getZExtValue() >> HalfSize) != 0)
+        if (!isUIntN(HalfSize, C->getZExtValue()))
            return false;
        }
        continue;
@@ -4549,7 +4623,8 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
      return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
                         LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
-                       LD->isNonTemporal(), LD->getAlignment());
+                       LD->isNonTemporal(), LD->isInvariant(),
+                       LD->getAlignment());
    // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
    // have been legalized as a BITCAST from v4i32.
    if (N->getOpcode() == ISD::BITCAST) {
@@ -4882,9 +4957,9 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
  static void
  ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
                      SelectionDAG &DAG, unsigned NewOp) {
-  EVT T = Node->getValueType(0);
    DebugLoc dl = Node->getDebugLoc();
-  assert (T == MVT::i64 && "Only know how to expand i64 atomics");
+  assert (Node->getValueType(0) == MVT::i64 &&
+          "Only know how to expand i64 atomics");
  
    SmallVector<SDValue, 6> Ops;
    Ops.push_back(Node->getOperand(0)); // Chain
@@ -4953,6 +5028,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
    case ISD::SETCC:         return LowerVSETCC(Op, DAG);
    case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
    case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
    case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
    case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
    case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
@@ -5480,6 +5556,551 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
    return BB;
  }
  
+/// EmitBasePointerRecalculation - For functions using a base pointer, we
+/// rematerialize it (via the frame pointer).
+void ARMTargetLowering::
+EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
+                             MachineBasicBlock *DispatchBB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
+  MachineFunction &MF = *MI->getParent()->getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+
+  if (!RI.hasBasePointer(MF)) return;
+
+  MachineBasicBlock::iterator MBBI = MI;
+
+  int32_t NumBytes = AFI->getFramePtrSpillOffset();
+  unsigned FramePtr = RI.getFrameRegister(MF);
+  assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+         "Base pointer without frame pointer?");
+
+  if (AFI->isThumb2Function())
+    llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
+                                 FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
+  else if (AFI->isThumbFunction())
+    llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
+                                    FramePtr, -NumBytes, *AII, RI);
+  else
+    llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
+                                  FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
+
+  if (!RI.needsStackRealignment(MF)) return;
+
+  // If there's dynamic realignment, adjust for it.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned MaxAlign = MFI->getMaxAlignment();
+  assert(!AFI->isThumb1OnlyFunction());
+
+  // Emit bic r6, r6, MaxAlign
+  unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri;
+  AddDefaultCC(
+    AddDefaultPred(
+      BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6)
+      .addReg(ARM::R6, RegState::Kill)
+      .addImm(MaxAlign - 1)));
+}
+
+/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
+/// registers the function context.
+void ARMTargetLowering::
+SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
+                       MachineBasicBlock *DispatchBB, int FI) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo *MRI = &MF->getRegInfo();
+  MachineConstantPool *MCP = MF->getConstantPool();
+  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
+  const Function *F = MF->getFunction();
+
+  bool isThumb = Subtarget->isThumb();
+  bool isThumb2 = Subtarget->isThumb2();
+
+  unsigned PCLabelId = AFI->createPICLabelUId();
+  unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
+  ARMConstantPoolValue *CPV =
+    ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
+  unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
+
+  const TargetRegisterClass *TRC =
+    isThumb ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+
+  // Grab constant pool and fixed stack memory operands.
+  MachineMemOperand *CPMMO =
+    MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
+                             MachineMemOperand::MOLoad, 4, 4);
+
+  MachineMemOperand *FIMMOSt =
+    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+                             MachineMemOperand::MOStore, 4, 4);
+
+  EmitBasePointerRecalculation(MI, MBB, DispatchBB);
+
+  // Load the address of the dispatch MBB into the jump buffer.
+  if (isThumb2) {
+    // Incoming value: jbuf
+    //   ldr.n  r5, LCPI1_1
+    //   orr    r5, r5, #1
+    //   add    r5, pc
+    //   str    r5, [$jbuf, #+4] ; &jbuf[1]
+    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
+                   .addConstantPoolIndex(CPI)
+                   .addMemOperand(CPMMO));
+    // Set the low bit because of thumb mode.
+    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+    AddDefaultCC(
+      AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
+                     .addReg(NewVReg1, RegState::Kill)
+                     .addImm(0x01)));
+    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+    BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
+      .addReg(NewVReg2, RegState::Kill)
+      .addImm(PCLabelId);
+    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
+                   .addReg(NewVReg3, RegState::Kill)
+                   .addFrameIndex(FI)
+                   .addImm(36)  // &jbuf[1] :: pc
+                   .addMemOperand(FIMMOSt));
+  } else if (isThumb) {
+    // Incoming value: jbuf
+    //   ldr.n  r1, LCPI1_4
+    //   add    r1, pc
+    //   mov    r2, #1
+    //   orrs   r1, r2
+    //   add    r2, $jbuf, #+4 ; &jbuf[1]
+    //   str    r1, [r2]
+    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
+                   .addConstantPoolIndex(CPI)
+                   .addMemOperand(CPMMO));
+    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+    BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
+      .addReg(NewVReg1, RegState::Kill)
+      .addImm(PCLabelId);
+    // Set the low bit because of thumb mode.
+    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
+                   .addReg(ARM::CPSR, RegState::Define)
+                   .addImm(1));
+    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
+                   .addReg(ARM::CPSR, RegState::Define)
+                   .addReg(NewVReg2, RegState::Kill)
+                   .addReg(NewVReg3, RegState::Kill));
+    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
+                   .addFrameIndex(FI)
+                   .addImm(36)); // &jbuf[1] :: pc
+    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
+                   .addReg(NewVReg4, RegState::Kill)
+                   .addReg(NewVReg5, RegState::Kill)
+                   .addImm(0)
+                   .addMemOperand(FIMMOSt));
+  } else {
+    // Incoming value: jbuf
+    //   ldr  r1, LCPI1_1
+    //   add  r1, pc, r1
+    //   str  r1, [$jbuf, #+4] ; &jbuf[1]
+    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12),  NewVReg1)
+                   .addConstantPoolIndex(CPI)
+                   .addImm(0)
+                   .addMemOperand(CPMMO));
+    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
+                   .addReg(NewVReg1, RegState::Kill)
+                   .addImm(PCLabelId));
+    AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
+                   .addReg(NewVReg2, RegState::Kill)
+                   .addFrameIndex(FI)
+                   .addImm(36)  // &jbuf[1] :: pc
+                   .addMemOperand(FIMMOSt));
+  }
+}
+
+MachineBasicBlock *ARMTargetLowering::
+EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo *MRI = &MF->getRegInfo();
+  ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
+  MachineFrameInfo *MFI = MF->getFrameInfo();
+  int FI = MFI->getFunctionContextIndex();
+
+  const TargetRegisterClass *TRC =
+    Subtarget->isThumb() ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
+
+  // Get a mapping of the call site numbers to all of the landing pads they're
+  // associated with.
+  DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
+  unsigned MaxCSNum = 0;
+  MachineModuleInfo &MMI = MF->getMMI();
+  for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) {
+    if (!BB->isLandingPad()) continue;
+
+    // FIXME: We should assert that the EH_LABEL is the first MI in the landing
+    // pad.
+    for (MachineBasicBlock::iterator
+           II = BB->begin(), IE = BB->end(); II != IE; ++II) {
+      if (!II->isEHLabel()) continue;
+
+      MCSymbol *Sym = II->getOperand(0).getMCSymbol();
+      if (!MMI.hasCallSiteLandingPad(Sym)) continue;
+
+      SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
+      for (SmallVectorImpl<unsigned>::iterator
+             CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
+           CSI != CSE; ++CSI) {
+        CallSiteNumToLPad[*CSI].push_back(BB);
+        MaxCSNum = std::max(MaxCSNum, *CSI);
+      }
+      break;
+    }
+  }
+
+  // Get an ordered list of the machine basic blocks for the jump table.
+  std::vector<MachineBasicBlock*> LPadList;
+  SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;
+  LPadList.reserve(CallSiteNumToLPad.size());
+  for (unsigned I = 1; I <= MaxCSNum; ++I) {
+    SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
+    for (SmallVectorImpl<MachineBasicBlock*>::iterator
+           II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
+      LPadList.push_back(*II);
+      InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
+    }
+  }
+
+  assert(!LPadList.empty() &&
+         "No landing pad destinations for the dispatch jump table!");
+
+  // Create the jump table and associated information.
+  MachineJumpTableInfo *JTI =
+    MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
+  unsigned MJTI = JTI->createJumpTableIndex(LPadList);
+  unsigned UId = AFI->createJumpTableUId();
+
+  // Create the MBBs for the dispatch code.
+
+  // Shove the dispatch's address into the return slot in the function context.
+  MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
+  DispatchBB->setIsLandingPad();
+
+  MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+  BuildMI(TrapBB, dl, TII->get(Subtarget->isThumb() ? ARM::tTRAP : ARM::TRAP));
+  DispatchBB->addSuccessor(TrapBB);
+
+  MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
+  DispatchBB->addSuccessor(DispContBB);
+
+  // Insert and MBBs.
+  MF->insert(MF->end(), DispatchBB);
+  MF->insert(MF->end(), DispContBB);
+  MF->insert(MF->end(), TrapBB);
+
+  // Insert code into the entry block that creates and registers the function
+  // context.
+  SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
+
+  MachineMemOperand *FIMMOLd =
+    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
+                             MachineMemOperand::MOLoad |
+                             MachineMemOperand::MOVolatile, 4, 4);
+
+  unsigned NumLPads = LPadList.size();
+  if (Subtarget->isThumb2()) {
+    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
+                   .addFrameIndex(FI)
+                   .addImm(4)
+                   .addMemOperand(FIMMOLd));
+
+    if (NumLPads < 256) {
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
+                     .addReg(NewVReg1)
+                     .addImm(LPadList.size()));
+    } else {
+      unsigned VReg1 = MRI->createVirtualRegister(TRC);
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
+                     .addImm(NumLPads & 0xFFFF));
+
+      unsigned VReg2 = VReg1;
+      if ((NumLPads & 0xFFFF0000) != 0) {
+        VReg2 = MRI->createVirtualRegister(TRC);
+        AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
+                       .addReg(VReg1)
+                       .addImm(NumLPads >> 16));
+      }
+
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
+                     .addReg(NewVReg1)
+                     .addReg(VReg2));
+    }
+
+    BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
+      .addMBB(TrapBB)
+      .addImm(ARMCC::HI)
+      .addReg(ARM::CPSR);
+
+    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
+                   .addJumpTableIndex(MJTI)
+                   .addImm(UId));
+
+    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+    AddDefaultCC(
+      AddDefaultPred(
+        BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
+        .addReg(NewVReg3, RegState::Kill)
+        .addReg(NewVReg1)
+        .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+
+    BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
+      .addReg(NewVReg4, RegState::Kill)
+      .addReg(NewVReg1)
+      .addJumpTableIndex(MJTI)
+      .addImm(UId);
+  } else if (Subtarget->isThumb()) {
+    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
+                   .addFrameIndex(FI)
+                   .addImm(1)
+                   .addMemOperand(FIMMOLd));
+
+    if (NumLPads < 256) {
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
+                     .addReg(NewVReg1)
+                     .addImm(NumLPads));
+    } else {
+      MachineConstantPool *ConstantPool = MF->getConstantPool();
+      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
+
+      // MachineConstantPool wants an explicit alignment.
+      unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+      if (Align == 0)
+        Align = getTargetData()->getTypeAllocSize(C->getType());
+      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+      unsigned VReg1 = MRI->createVirtualRegister(TRC);
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
+                     .addReg(VReg1, RegState::Define)
+                     .addConstantPoolIndex(Idx));
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
+                     .addReg(NewVReg1)
+                     .addReg(VReg1));
+    }
+
+    BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
+      .addMBB(TrapBB)
+      .addImm(ARMCC::HI)
+      .addReg(ARM::CPSR);
+
+    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
+                   .addReg(ARM::CPSR, RegState::Define)
+                   .addReg(NewVReg1)
+                   .addImm(2));
+
+    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
+                   .addJumpTableIndex(MJTI)
+                   .addImm(UId));
+
+    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
+                   .addReg(ARM::CPSR, RegState::Define)
+                   .addReg(NewVReg2, RegState::Kill)
+                   .addReg(NewVReg3));
+
+    MachineMemOperand *JTMMOLd =
+      MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
+                               MachineMemOperand::MOLoad, 4, 4);
+
+    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
+                   .addReg(NewVReg4, RegState::Kill)
+                   .addImm(0)
+                   .addMemOperand(JTMMOLd));
+
+    unsigned NewVReg6 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
+                   .addReg(ARM::CPSR, RegState::Define)
+                   .addReg(NewVReg5, RegState::Kill)
+                   .addReg(NewVReg3));
+
+    BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
+      .addReg(NewVReg6, RegState::Kill)
+      .addJumpTableIndex(MJTI)
+      .addImm(UId);
+  } else {
+    unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
+                   .addFrameIndex(FI)
+                   .addImm(4)
+                   .addMemOperand(FIMMOLd));
+
+    if (NumLPads < 256) {
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
+                     .addReg(NewVReg1)
+                     .addImm(NumLPads));
+    } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
+      unsigned VReg1 = MRI->createVirtualRegister(TRC);
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
+                     .addImm(NumLPads & 0xFFFF));
+
+      unsigned VReg2 = VReg1;
+      if ((NumLPads & 0xFFFF0000) != 0) {
+        VReg2 = MRI->createVirtualRegister(TRC);
+        AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
+                       .addReg(VReg1)
+                       .addImm(NumLPads >> 16));
+      }
+
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+                     .addReg(NewVReg1)
+                     .addReg(VReg2));
+    } else {
+      MachineConstantPool *ConstantPool = MF->getConstantPool();
+      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
+
+      // MachineConstantPool wants an explicit alignment.
+      unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+      if (Align == 0)
+        Align = getTargetData()->getTypeAllocSize(C->getType());
+      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+      unsigned VReg1 = MRI->createVirtualRegister(TRC);
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
+                     .addReg(VReg1, RegState::Define)
+                     .addConstantPoolIndex(Idx)
+                     .addImm(0));
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+                     .addReg(NewVReg1)
+                     .addReg(VReg1, RegState::Kill));
+    }
+
+    BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
+      .addMBB(TrapBB)
+      .addImm(ARMCC::HI)
+      .addReg(ARM::CPSR);
+
+    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+    AddDefaultCC(
+      AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
+                     .addReg(NewVReg1)
+                     .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
+    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
+                   .addJumpTableIndex(MJTI)
+                   .addImm(UId));
+
+    MachineMemOperand *JTMMOLd =
+      MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
+                               MachineMemOperand::MOLoad, 4, 4);
+    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(
+      BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
+      .addReg(NewVReg3, RegState::Kill)
+      .addReg(NewVReg4)
+      .addImm(0)
+      .addMemOperand(JTMMOLd));
+
+    BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
+      .addReg(NewVReg5, RegState::Kill)
+      .addReg(NewVReg4)
+      .addJumpTableIndex(MJTI)
+      .addImm(UId);
+  }
+
+  // Add the jump table entries as successors to the MBB.
+  MachineBasicBlock *PrevMBB = 0;
+  for (std::vector<MachineBasicBlock*>::iterator
+         I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
+    MachineBasicBlock *CurMBB = *I;
+    if (PrevMBB != CurMBB)
+      DispContBB->addSuccessor(CurMBB);
+    PrevMBB = CurMBB;
+  }
+
+  // N.B. the order the invoke BBs are processed in doesn't matter here.
+  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
+  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+  const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF);
+  SmallVector<MachineBasicBlock*, 64> MBBLPads;
+  for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
+         I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
+    MachineBasicBlock *BB = *I;
+
+    // Remove the landing pad successor from the invoke block and replace it
+    // with the new dispatch block.
+    SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
+                                                  BB->succ_end());
+    while (!Successors.empty()) {
+      MachineBasicBlock *SMBB = Successors.pop_back_val();
+      if (SMBB->isLandingPad()) {
+        BB->removeSuccessor(SMBB);
+        MBBLPads.push_back(SMBB);
+      }
+    }
+
+    BB->addSuccessor(DispatchBB);
+
+    // Find the invoke call and mark all of the callee-saved registers as
+    // 'implicit defined' so that they're spilled. This prevents code from
+    // moving instructions to before the EH block, where they will never be
+    // executed.
+    for (MachineBasicBlock::reverse_iterator
+           II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
+      if (!II->getDesc().isCall()) continue;
+
+      DenseMap<unsigned, bool> DefRegs;
+      for (MachineInstr::mop_iterator
+             OI = II->operands_begin(), OE = II->operands_end();
+           OI != OE; ++OI) {
+        if (!OI->isReg()) continue;
+        DefRegs[OI->getReg()] = true;
+      }
+
+      MachineInstrBuilder MIB(&*II);
+
+      for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
+        unsigned Reg = SavedRegs[i];
+        if (Subtarget->isThumb2() &&
+            !ARM::tGPRRegisterClass->contains(Reg) &&
+            !ARM::hGPRRegisterClass->contains(Reg))
+          continue;
+        else if (Subtarget->isThumb1Only() &&
+                 !ARM::tGPRRegisterClass->contains(Reg))
+          continue;
+        else if (!Subtarget->isThumb() &&
+                 !ARM::GPRRegisterClass->contains(Reg))
+          continue;
+        if (!DefRegs[Reg])
+          MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
+      }
+
+      break;
+    }
+  }
+
+  // Mark all former landing pads as non-landing pads. The dispatch is the only
+  // landing pad now.
+  for (SmallVectorImpl<MachineBasicBlock*>::iterator
+         I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
+    (*I)->setIsLandingPad(false);
+
+  // The instruction is gone now.
+  MI->eraseFromParent();
+
+  return MBB;
+}
+
  static
  MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
    for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
@@ -5754,13 +6375,101 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
      MI->eraseFromParent();   // The pseudo instruction is gone now.
      return BB;
    }
+
+  case ARM::Int_eh_sjlj_setjmp:
+  case ARM::Int_eh_sjlj_setjmp_nofp:
+  case ARM::tInt_eh_sjlj_setjmp:
+  case ARM::t2Int_eh_sjlj_setjmp:
+  case ARM::t2Int_eh_sjlj_setjmp_nofp:
+    EmitSjLjDispatchBlock(MI, BB);
+    return BB;
+
+  case ARM::ABS:
+  case ARM::t2ABS: {
+    // To insert an ABS instruction, we have to insert the
+    // diamond control-flow pattern.  The incoming instruction knows the
+    // source vreg to test against 0, the destination vreg to set,
+    // the condition code register to branch on, the
+    // true/false values to select between, and a branch opcode to use.
+    // It transforms
+    //     V1 = ABS V0
+    // into
+    //     V2 = MOVS V0
+    //     BCC                      (branch to SinkBB if V0 >= 0)
+    //     RSBBB: V3 = RSBri V2, 0  (compute ABS if V2 < 0)
+    //     SinkBB: V1 = PHI(V2, V3)
+    const BasicBlock *LLVM_BB = BB->getBasicBlock();
+    MachineFunction::iterator BBI = BB;
+    ++BBI;
+    MachineFunction *Fn = BB->getParent();
+    MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *SinkBB  = Fn->CreateMachineBasicBlock(LLVM_BB);
+    Fn->insert(BBI, RSBBB);
+    Fn->insert(BBI, SinkBB);
+
+    unsigned int ABSSrcReg = MI->getOperand(1).getReg();
+    unsigned int ABSDstReg = MI->getOperand(0).getReg();
+    bool isThumb2 = Subtarget->isThumb2();
+    MachineRegisterInfo &MRI = Fn->getRegInfo();
+    // In Thumb mode S must not be specified if source register is the SP or
+    // PC and if destination register is the SP, so restrict register class
+    unsigned NewMovDstReg = MRI.createVirtualRegister(
+      isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
+    unsigned NewRsbDstReg = MRI.createVirtualRegister(
+      isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
+
+    // Transfer the remainder of BB and its successor edges to sinkMBB.
+    SinkBB->splice(SinkBB->begin(), BB,
+      llvm::next(MachineBasicBlock::iterator(MI)),
+      BB->end());
+    SinkBB->transferSuccessorsAndUpdatePHIs(BB);
+
+    BB->addSuccessor(RSBBB);
+    BB->addSuccessor(SinkBB);
+
+    // fall through to SinkMBB
+    RSBBB->addSuccessor(SinkBB);
+
+    // insert a movs at the end of BB
+    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
+      NewMovDstReg)
+      .addReg(ABSSrcReg, RegState::Kill)
+      .addImm((unsigned)ARMCC::AL).addReg(0)
+      .addReg(ARM::CPSR, RegState::Define);
+
+    // insert a bcc with opposite CC to ARMCC::MI at the end of BB
+    BuildMI(BB, dl,
+      TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
+      .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
+
+    // insert rsbri in RSBBB
+    // Note: BCC and rsbri will be converted into predicated rsbmi
+    // by if-conversion pass
+    BuildMI(*RSBBB, RSBBB->begin(), dl,
+      TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
+      .addReg(NewMovDstReg, RegState::Kill)
+      .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+
+    // insert PHI in SinkBB,
+    // reuse ABSDstReg to not change uses of ABS instruction
+    BuildMI(*SinkBB, SinkBB->begin(), dl,
+      TII->get(ARM::PHI), ABSDstReg)
+      .addReg(NewRsbDstReg).addMBB(RSBBB)
+      .addReg(NewMovDstReg).addMBB(BB);
+
+    // remove ABS instruction
+    MI->eraseFromParent();
+
+    // return last added BB
+    return SinkBB;
+  }
    }
  }
  
  void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
                                                        SDNode *Node) const {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.hasPostISelHook()) {
+  const MCInstrDesc *MCID = &MI->getDesc();
+  if (!MCID->hasPostISelHook()) {
      assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
             "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
      return;
@@ -5771,20 +6480,28 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
    // operand is still set to noreg. If needed, set the optional operand's
    // register to CPSR, and remove the redundant implicit def.
    //
-  // e.g. ADCS (...opt:%noreg, CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
+  // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
  
    // Rename pseudo opcodes.
    unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
    if (NewOpc) {
      const ARMBaseInstrInfo *TII =
        static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
-    MI->setDesc(TII->get(NewOpc));
+    MCID = &TII->get(NewOpc);
+
+    assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
+           "converted opcode should be the same except for cc_out");
+
+    MI->setDesc(*MCID);
+
+    // Add the optional cc_out operand
+    MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
    }
-  unsigned ccOutIdx = MCID.getNumOperands() - 1;
+  unsigned ccOutIdx = MCID->getNumOperands() - 1;
  
    // Any ARM instruction that sets the 's' bit should specify an optional
    // "cc_out" operand in the last operand position.
-  if (!MCID.hasOptionalDef() || !MCID.OpInfo[ccOutIdx].isOptionalDef()) {
+  if (!MCID->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
      assert(!NewOpc && "Optional cc_out operand required");
      return;
    }
@@ -5792,7 +6509,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
    // since we already have an optional CPSR def.
    bool definesCPSR = false;
    bool deadCPSR = false;
-  for (unsigned i = MCID.getNumOperands(), e = MI->getNumOperands();
+  for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
         i != e; ++i) {
      const MachineOperand &MO = MI->getOperand(i);
      if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
@@ -6379,13 +7096,14 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
      SDValue BasePtr = LD->getBasePtr();
      SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
                                   LD->getPointerInfo(), LD->isVolatile(),
-                                 LD->isNonTemporal(), LD->getAlignment());
+                                 LD->isNonTemporal(), LD->isInvariant(),
+                                 LD->getAlignment());
  
      SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
                                      DAG.getConstant(4, MVT::i32));
      SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
                                   LD->getPointerInfo(), LD->isVolatile(),
-                                 LD->isNonTemporal(),
+                                 LD->isNonTemporal(), LD->isInvariant(),
                                   std::min(4U, LD->getAlignment() / 2));
  
      DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
@@ -7454,6 +8172,41 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
    }
  }
  
+static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
+                       unsigned AlignCheck) {
+  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
+          (DstAlign == 0 || DstAlign % AlignCheck == 0));
+}
+
+EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
+                                           unsigned DstAlign, unsigned SrcAlign,
+                                           bool IsZeroVal,
+                                           bool MemcpyStrSrc,
+                                           MachineFunction &MF) const {
+  const Function *F = MF.getFunction();
+
+  // See if we can use NEON instructions for this...
+  if (IsZeroVal &&
+      !F->hasFnAttr(Attribute::NoImplicitFloat) &&
+      Subtarget->hasNEON()) {
+    if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
+      return MVT::v4i32;
+    } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
+      return MVT::v2i32;
+    }
+  }
+
+  // Lowering to i32/i16 if the size permits.
+  if (Size >= 4) {
+    return MVT::i32;
+  } else if (Size >= 2) {
+    return MVT::i16;
+  }
+
+  // Let the target-independent logic figure it out.
+  return MVT::Other;
+}
+
  static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
    if (V < 0)
      return false;