Make TargetLowering::getPointerTy() taking DataLayout as an argument

[oota-llvm.git] / lib / Target / AArch64 / AArch64ISelLowering.cpp
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 6da468ed6b14d650bb6e3d88eafeaa889dfbed2a..db04623d5b7c98c49bf32cb87eabb912126d286d 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25,6 +25,7 @@
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/IR/Function.h"
  #include "llvm/CodeGen/MachineInstrBuilder.h"
  #include "llvm/CodeGen/MachineRegisterInfo.h"
  #include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
  #include "llvm/IR/Intrinsics.h"
  #include "llvm/IR/Type.h"
  #include "llvm/Support/CommandLine.h"
  #include "llvm/IR/Intrinsics.h"
  #include "llvm/IR/Type.h"
  #include "llvm/Support/CommandLine.h"
@@ -64,13 +65,20 @@ EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden,
  
  static cl::opt<bool>
  EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
  
  static cl::opt<bool>
  EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
-                         cl::desc("Allow AArch64 SLI/SRI formation"),
-                         cl::init(false));
+                           cl::desc("Allow AArch64 SLI/SRI formation"),
+                           cl::init(false));
  
  
+// FIXME: The necessary dtprel relocations don't seem to be supported
+// well in the GNU bfd and gold linkers at the moment. Therefore, by
+// default, for now, fall back to GeneralDynamic code generation.
+cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
+    "aarch64-elf-ldtls-generation", cl::Hidden,
+    cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
+    cl::init(false));
  
  
-AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
-    : TargetLowering(TM) {
-  Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
+                                             const AArch64Subtarget &STI)
+    : TargetLowering(TM), Subtarget(&STI) {
  
    // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
    // we have to make something up. Arbitrarily, choose ZeroOrOne.
  
    // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
    // we have to make something up. Arbitrarily, choose ZeroOrOne.
@@ -112,7 +120,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
    }
  
    // Compute derived properties from the register classes
    }
  
    // Compute derived properties from the register classes
-  computeRegisterProperties();
+  computeRegisterProperties(Subtarget->getRegisterInfo());
  
    // Provide all sorts of operation actions
    setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
  
    // Provide all sorts of operation actions
    setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
@@ -274,14 +282,39 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
  
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
  
-  // f16 is storage-only, so we promote operations to f32 if we know this is
-  // valid, and ignore them otherwise. The operations not mentioned here will
-  // fail to select, but this is not a major problem as no source language
-  // should be emitting native f16 operations yet.
-  setOperationAction(ISD::FADD, MVT::f16, Promote);
-  setOperationAction(ISD::FDIV, MVT::f16, Promote);
-  setOperationAction(ISD::FMUL, MVT::f16, Promote);
-  setOperationAction(ISD::FSUB, MVT::f16, Promote);
+  // f16 is a storage-only type, always promote it to f32.
+  setOperationAction(ISD::SETCC,       MVT::f16,  Promote);
+  setOperationAction(ISD::BR_CC,       MVT::f16,  Promote);
+  setOperationAction(ISD::SELECT_CC,   MVT::f16,  Promote);
+  setOperationAction(ISD::SELECT,      MVT::f16,  Promote);
+  setOperationAction(ISD::FADD,        MVT::f16,  Promote);
+  setOperationAction(ISD::FSUB,        MVT::f16,  Promote);
+  setOperationAction(ISD::FMUL,        MVT::f16,  Promote);
+  setOperationAction(ISD::FDIV,        MVT::f16,  Promote);
+  setOperationAction(ISD::FREM,        MVT::f16,  Promote);
+  setOperationAction(ISD::FMA,         MVT::f16,  Promote);
+  setOperationAction(ISD::FNEG,        MVT::f16,  Promote);
+  setOperationAction(ISD::FABS,        MVT::f16,  Promote);
+  setOperationAction(ISD::FCEIL,       MVT::f16,  Promote);
+  setOperationAction(ISD::FCOPYSIGN,   MVT::f16,  Promote);
+  setOperationAction(ISD::FCOS,        MVT::f16,  Promote);
+  setOperationAction(ISD::FFLOOR,      MVT::f16,  Promote);
+  setOperationAction(ISD::FNEARBYINT,  MVT::f16,  Promote);
+  setOperationAction(ISD::FPOW,        MVT::f16,  Promote);
+  setOperationAction(ISD::FPOWI,       MVT::f16,  Promote);
+  setOperationAction(ISD::FRINT,       MVT::f16,  Promote);
+  setOperationAction(ISD::FSIN,        MVT::f16,  Promote);
+  setOperationAction(ISD::FSINCOS,     MVT::f16,  Promote);
+  setOperationAction(ISD::FSQRT,       MVT::f16,  Promote);
+  setOperationAction(ISD::FEXP,        MVT::f16,  Promote);
+  setOperationAction(ISD::FEXP2,       MVT::f16,  Promote);
+  setOperationAction(ISD::FLOG,        MVT::f16,  Promote);
+  setOperationAction(ISD::FLOG2,       MVT::f16,  Promote);
+  setOperationAction(ISD::FLOG10,      MVT::f16,  Promote);
+  setOperationAction(ISD::FROUND,      MVT::f16,  Promote);
+  setOperationAction(ISD::FTRUNC,      MVT::f16,  Promote);
+  setOperationAction(ISD::FMINNUM,     MVT::f16,  Promote);
+  setOperationAction(ISD::FMAXNUM,     MVT::f16,  Promote);
  
    // v4f16 is also a storage-only type, so promote it to v4f32 when that is
    // known to be safe.
  
    // v4f16 is also a storage-only type, so promote it to v4f32 when that is
    // known to be safe.
@@ -363,9 +396,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
    setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
  
    // AArch64 has implementations of a lot of rounding-like FP operations.
    setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
  
    // AArch64 has implementations of a lot of rounding-like FP operations.
-  static MVT RoundingTypes[] = { MVT::f32, MVT::f64};
-  for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) {
-    MVT Ty = RoundingTypes[I];
+  for (MVT Ty : {MVT::f32, MVT::f64}) {
      setOperationAction(ISD::FFLOOR, Ty, Legal);
      setOperationAction(ISD::FNEARBYINT, Ty, Legal);
      setOperationAction(ISD::FCEIL, Ty, Legal);
      setOperationAction(ISD::FFLOOR, Ty, Legal);
      setOperationAction(ISD::FNEARBYINT, Ty, Legal);
      setOperationAction(ISD::FCEIL, Ty, Legal);
@@ -387,13 +418,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
      setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
    }
  
      setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
    }
  
+  // Make floating-point constants legal for the large code model, so they don't
+  // become loads from the constant pool.
+  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
+    setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+    setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  }
+
    // AArch64 does not have floating-point extending loads, i1 sign-extending
    // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
    // AArch64 does not have floating-point extending loads, i1 sign-extending
    // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
-  setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
-  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
-  setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
-  setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+  for (MVT VT : MVT::fp_valuetypes()) {
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
+    setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
+  }
+  for (MVT VT : MVT::integer_valuetypes())
+    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
+
    setTruncStoreAction(MVT::f32, MVT::f16, Expand);
    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    setTruncStoreAction(MVT::f64, MVT::f16, Expand);
    setTruncStoreAction(MVT::f32, MVT::f16, Expand);
    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    setTruncStoreAction(MVT::f64, MVT::f16, Expand);
@@ -450,6 +492,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
  
    setTargetDAGCombine(ISD::SELECT);
    setTargetDAGCombine(ISD::VSELECT);
  
    setTargetDAGCombine(ISD::SELECT);
    setTargetDAGCombine(ISD::VSELECT);
+  setTargetDAGCombine(ISD::SELECT_CC);
  
    setTargetDAGCombine(ISD::INTRINSIC_VOID);
    setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
  
    setTargetDAGCombine(ISD::INTRINSIC_VOID);
    setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
@@ -465,6 +508,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
  
    // Enable TBZ/TBNZ
    MaskAndBranchFoldingIsLegal = true;
  
    // Enable TBZ/TBNZ
    MaskAndBranchFoldingIsLegal = true;
+  EnableExtLdPromotion = true;
  
    setMinFunctionAlignment(2);
  
  
    setMinFunctionAlignment(2);
  
@@ -515,11 +559,21 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
      setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
      setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
      setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
      setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
      setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
      setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
+    // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
+    // -> v8f16 conversions.
+    setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote);
      // Similarly, there is no direct i32 -> f64 vector conversion instruction.
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
      // Similarly, there is no direct i32 -> f64 vector conversion instruction.
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
      setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
      setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
+    // Or, direct i32 -> f16 vector conversion.  Set it so custom, so the
+    // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
+    setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
  
      // AArch64 doesn't have MUL.2d:
      setOperationAction(ISD::MUL, MVT::v2i64, Expand);
  
      // AArch64 doesn't have MUL.2d:
      setOperationAction(ISD::MUL, MVT::v2i64, Expand);
@@ -532,32 +586,26 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM)
      setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
      // Likewise, narrowing and extending vector loads/stores aren't handled
      // directly.
      setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
      // Likewise, narrowing and extending vector loads/stores aren't handled
      // directly.
-    for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-         VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
-
-      setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
-                         Expand);
-
-      setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
-      setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
-      setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
-      setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
-
-      setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
-
-      for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-           InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
-        setTruncStoreAction((MVT::SimpleValueType)VT,
-                            (MVT::SimpleValueType)InnerVT, Expand);
-      setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
-      setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
-      setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+    for (MVT VT : MVT::vector_valuetypes()) {
+      setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+
+      setOperationAction(ISD::MULHS, VT, Expand);
+      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+      setOperationAction(ISD::MULHU, VT, Expand);
+      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+      setOperationAction(ISD::BSWAP, VT, Expand);
+
+      for (MVT InnerVT : MVT::vector_valuetypes()) {
+        setTruncStoreAction(VT, InnerVT, Expand);
+        setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
+        setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
+        setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+      }
      }
  
      // AArch64 has implementations of a lot of rounding-like FP operations.
      }
  
      // AArch64 has implementations of a lot of rounding-like FP operations.
-    static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 };
-    for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) {
-      MVT Ty = RoundingVecTypes[I];
+    for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
        setOperationAction(ISD::FFLOOR, Ty, Legal);
        setOperationAction(ISD::FNEARBYINT, Ty, Legal);
        setOperationAction(ISD::FCEIL, Ty, Legal);
        setOperationAction(ISD::FFLOOR, Ty, Legal);
        setOperationAction(ISD::FNEARBYINT, Ty, Legal);
        setOperationAction(ISD::FCEIL, Ty, Legal);
@@ -616,7 +664,8 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
    setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
    setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
    setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand);
    setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
    setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
    setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand);
-  setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
+  for (MVT InnerVT : MVT::all_valuetypes())
+    setLoadExtAction(ISD::EXTLOAD, InnerVT, VT.getSimpleVT(), Expand);
  
    // CNT supports only B element sizes.
    if (VT != MVT::v8i8 && VT != MVT::v16i8)
  
    // CNT supports only B element sizes.
    if (VT != MVT::v8i8 && VT != MVT::v16i8)
@@ -631,6 +680,12 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
  
    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
  
+  // [SU][MIN|MAX] are available for all NEON types apart from i64.
+  if (!VT.isFloatingPoint() &&
+      VT.getSimpleVT() != MVT::v2i64 && VT.getSimpleVT() != MVT::v1i64)
+    for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+      setOperationAction(Opcode, VT.getSimpleVT(), Legal);
+
    if (Subtarget->isLittleEndian()) {
      for (unsigned im = (unsigned)ISD::PRE_INC;
           im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
    if (Subtarget->isLittleEndian()) {
      for (unsigned im = (unsigned)ISD::PRE_INC;
           im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
@@ -650,7 +705,8 @@ void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
    addTypeForNEON(VT, MVT::v4i32);
  }
  
    addTypeForNEON(VT, MVT::v4i32);
  }
  
-EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
+                                              EVT VT) const {
    if (!VT.isVector())
      return MVT::i32;
    return VT.changeVectorElementTypeToInteger();
    if (!VT.isVector())
      return MVT::i32;
    return VT.changeVectorElementTypeToInteger();
@@ -723,13 +779,6 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
    return MVT::i64;
  }
  
    return MVT::i64;
  }
  
-unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
-  // FIXME: On AArch64, this depends on the type.
-  // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
-  // and the offset has to be a multiple of the related size in bytes.
-  return 4095;
-}
-
  FastISel *
  AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
                                        const TargetLibraryInfo *libInfo) const {
  FastISel *
  AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
                                        const TargetLibraryInfo *libInfo) const {
@@ -737,9 +786,8 @@ AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
  }
  
  const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
  }
  
  const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default:
-    return nullptr;
+  switch ((AArch64ISD::NodeType)Opcode) {
+  case AArch64ISD::FIRST_NUMBER:      break;
    case AArch64ISD::CALL:              return "AArch64ISD::CALL";
    case AArch64ISD::ADRP:              return "AArch64ISD::ADRP";
    case AArch64ISD::ADDlow:            return "AArch64ISD::ADDlow";
    case AArch64ISD::CALL:              return "AArch64ISD::CALL";
    case AArch64ISD::ADRP:              return "AArch64ISD::ADRP";
    case AArch64ISD::ADDlow:            return "AArch64ISD::ADDlow";
@@ -752,7 +800,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case AArch64ISD::CSNEG:             return "AArch64ISD::CSNEG";
    case AArch64ISD::CSINC:             return "AArch64ISD::CSINC";
    case AArch64ISD::THREAD_POINTER:    return "AArch64ISD::THREAD_POINTER";
    case AArch64ISD::CSNEG:             return "AArch64ISD::CSNEG";
    case AArch64ISD::CSINC:             return "AArch64ISD::CSINC";
    case AArch64ISD::THREAD_POINTER:    return "AArch64ISD::THREAD_POINTER";
-  case AArch64ISD::TLSDESC_CALL:      return "AArch64ISD::TLSDESC_CALL";
+  case AArch64ISD::TLSDESC_CALLSEQ:   return "AArch64ISD::TLSDESC_CALLSEQ";
    case AArch64ISD::ADC:               return "AArch64ISD::ADC";
    case AArch64ISD::SBC:               return "AArch64ISD::SBC";
    case AArch64ISD::ADDS:              return "AArch64ISD::ADDS";
    case AArch64ISD::ADC:               return "AArch64ISD::ADC";
    case AArch64ISD::SBC:               return "AArch64ISD::SBC";
    case AArch64ISD::ADDS:              return "AArch64ISD::ADDS";
@@ -811,6 +859,12 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case AArch64ISD::FCMGTz:            return "AArch64ISD::FCMGTz";
    case AArch64ISD::FCMLEz:            return "AArch64ISD::FCMLEz";
    case AArch64ISD::FCMLTz:            return "AArch64ISD::FCMLTz";
    case AArch64ISD::FCMGTz:            return "AArch64ISD::FCMGTz";
    case AArch64ISD::FCMLEz:            return "AArch64ISD::FCMLEz";
    case AArch64ISD::FCMLTz:            return "AArch64ISD::FCMLTz";
+  case AArch64ISD::SADDV:             return "AArch64ISD::SADDV";
+  case AArch64ISD::UADDV:             return "AArch64ISD::UADDV";
+  case AArch64ISD::SMINV:             return "AArch64ISD::SMINV";
+  case AArch64ISD::UMINV:             return "AArch64ISD::UMINV";
+  case AArch64ISD::SMAXV:             return "AArch64ISD::SMAXV";
+  case AArch64ISD::UMAXV:             return "AArch64ISD::UMAXV";
    case AArch64ISD::NOT:               return "AArch64ISD::NOT";
    case AArch64ISD::BIT:               return "AArch64ISD::BIT";
    case AArch64ISD::CBZ:               return "AArch64ISD::CBZ";
    case AArch64ISD::NOT:               return "AArch64ISD::NOT";
    case AArch64ISD::BIT:               return "AArch64ISD::BIT";
    case AArch64ISD::CBZ:               return "AArch64ISD::CBZ";
@@ -818,6 +872,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case AArch64ISD::TBZ:               return "AArch64ISD::TBZ";
    case AArch64ISD::TBNZ:              return "AArch64ISD::TBNZ";
    case AArch64ISD::TC_RETURN:         return "AArch64ISD::TC_RETURN";
    case AArch64ISD::TBZ:               return "AArch64ISD::TBZ";
    case AArch64ISD::TBNZ:              return "AArch64ISD::TBNZ";
    case AArch64ISD::TC_RETURN:         return "AArch64ISD::TC_RETURN";
+  case AArch64ISD::PREFETCH:          return "AArch64ISD::PREFETCH";
    case AArch64ISD::SITOF:             return "AArch64ISD::SITOF";
    case AArch64ISD::UITOF:             return "AArch64ISD::UITOF";
    case AArch64ISD::NVCAST:            return "AArch64ISD::NVCAST";
    case AArch64ISD::SITOF:             return "AArch64ISD::SITOF";
    case AArch64ISD::UITOF:             return "AArch64ISD::UITOF";
    case AArch64ISD::NVCAST:            return "AArch64ISD::NVCAST";
@@ -853,6 +908,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case AArch64ISD::SMULL:             return "AArch64ISD::SMULL";
    case AArch64ISD::UMULL:             return "AArch64ISD::UMULL";
    }
    case AArch64ISD::SMULL:             return "AArch64ISD::SMULL";
    case AArch64ISD::UMULL:             return "AArch64ISD::UMULL";
    }
+  return nullptr;
  }
  
  MachineBasicBlock *
  }
  
  MachineBasicBlock *
@@ -870,9 +926,8 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
    // EndBB:
    //     Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
  
    // EndBB:
    //     Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
  
-  const TargetInstrInfo *TII =
-      getTargetMachine().getSubtargetImpl()->getInstrInfo();
    MachineFunction *MF = MBB->getParent();
    MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
    const BasicBlock *LLVM_BB = MBB->getBasicBlock();
    DebugLoc DL = MI->getDebugLoc();
    MachineFunction::iterator It = MBB;
    const BasicBlock *LLVM_BB = MBB->getBasicBlock();
    DebugLoc DL = MI->getDebugLoc();
    MachineFunction::iterator It = MBB;
@@ -1135,7 +1190,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
               isLegalArithImmed(C - 1ULL))) {
            CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
            C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
               isLegalArithImmed(C - 1ULL))) {
            CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
            C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
-          RHS = DAG.getConstant(C, VT);
+          RHS = DAG.getConstant(C, dl, VT);
          }
          break;
        case ISD::SETULT:
          }
          break;
        case ISD::SETULT:
@@ -1145,7 +1200,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
              (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
            CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
            C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
              (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
            CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
            C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
-          RHS = DAG.getConstant(C, VT);
+          RHS = DAG.getConstant(C, dl, VT);
          }
          break;
        case ISD::SETLE:
          }
          break;
        case ISD::SETLE:
@@ -1156,7 +1211,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
               isLegalArithImmed(C + 1ULL))) {
            CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
            C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
               isLegalArithImmed(C + 1ULL))) {
            CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
            C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
-          RHS = DAG.getConstant(C, VT);
+          RHS = DAG.getConstant(C, dl, VT);
          }
          break;
        case ISD::SETULE:
          }
          break;
        case ISD::SETULE:
@@ -1167,7 +1222,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
               isLegalArithImmed(C + 1ULL))) {
            CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
            C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
               isLegalArithImmed(C + 1ULL))) {
            CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
            C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
-          RHS = DAG.getConstant(C, VT);
+          RHS = DAG.getConstant(C, dl, VT);
          }
          break;
        }
          }
          break;
        }
@@ -1201,10 +1256,11 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
                            DAG.getValueType(MVT::i16));
            Cmp = emitComparison(SExt,
                DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
                            DAG.getValueType(MVT::i16));
            Cmp = emitComparison(SExt,
-                               DAG.getConstant(ValueofRHS, RHS.getValueType()),
+                               DAG.getConstant(ValueofRHS, dl,
+                                               RHS.getValueType()),
                                 CC, dl, DAG);
            AArch64CC = changeIntCCToAArch64CC(CC);
                                 CC, dl, DAG);
            AArch64CC = changeIntCCToAArch64CC(CC);
-          AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
+          AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
            return Cmp;
          }
        }
            return Cmp;
          }
        }
@@ -1212,7 +1268,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
    }
    Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
    AArch64CC = changeIntCCToAArch64CC(CC);
    }
    Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
    AArch64CC = changeIntCCToAArch64CC(CC);
-  AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
+  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
    return Cmp;
  }
  
    return Cmp;
  }
  
@@ -1248,7 +1304,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
    case ISD::SMULO:
    case ISD::UMULO: {
      CC = AArch64CC::NE;
    case ISD::SMULO:
    case ISD::UMULO: {
      CC = AArch64CC::NE;
-    bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false;
+    bool IsSigned = Op.getOpcode() == ISD::SMULO;
      if (Op.getValueType() == MVT::i32) {
        unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
        // For a 32 bit multiply with overflow check we want the instruction
      if (Op.getValueType() == MVT::i32) {
        unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
        // For a 32 bit multiply with overflow check we want the instruction
@@ -1259,7 +1315,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
        RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
        SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
        SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
        RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
        SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
        SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
-                                DAG.getConstant(0, MVT::i64));
+                                DAG.getConstant(0, DL, MVT::i64));
        // On AArch64 the upper 32 bits are always zero extended for a 32 bit
        // operation. We need to clear out the upper 32 bits, because we used a
        // widening multiply that wrote all 64 bits. In the end this should be a
        // On AArch64 the upper 32 bits are always zero extended for a 32 bit
        // operation. We need to clear out the upper 32 bits, because we used a
        // widening multiply that wrote all 64 bits. In the end this should be a
@@ -1272,10 +1328,10 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
          // check we have to arithmetic shift right the 32nd bit of the result by
          // 31 bits. Then we compare the result to the upper 32 bits.
          SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
          // check we have to arithmetic shift right the 32nd bit of the result by
          // 31 bits. Then we compare the result to the upper 32 bits.
          SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
-                                        DAG.getConstant(32, MVT::i64));
+                                        DAG.getConstant(32, DL, MVT::i64));
          UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
          SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
          UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
          SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
-                                        DAG.getConstant(31, MVT::i64));
+                                        DAG.getConstant(31, DL, MVT::i64));
          // It is important that LowerBits is last, otherwise the arithmetic
          // shift will not be folded into the compare (SUBS).
          SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
          // It is important that LowerBits is last, otherwise the arithmetic
          // shift will not be folded into the compare (SUBS).
          SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
@@ -1288,10 +1344,11 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
          // pattern:
          // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
          SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
          // pattern:
          // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
          SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
-                                        DAG.getConstant(32, MVT::i64));
+                                        DAG.getConstant(32, DL, MVT::i64));
          SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
          Overflow =
          SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
          Overflow =
-            DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
+            DAG.getNode(AArch64ISD::SUBS, DL, VTs,
+                        DAG.getConstant(0, DL, MVT::i64),
                          UpperBits).getValue(1);
        }
        break;
                          UpperBits).getValue(1);
        }
        break;
@@ -1302,7 +1359,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
      if (IsSigned) {
        SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
        SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
      if (IsSigned) {
        SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
        SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
-                                      DAG.getConstant(63, MVT::i64));
+                                      DAG.getConstant(63, DL, MVT::i64));
        // It is important that LowerBits is last, otherwise the arithmetic
        // shift will not be folded into the compare (SUBS).
        SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
        // It is important that LowerBits is last, otherwise the arithmetic
        // shift will not be folded into the compare (SUBS).
        SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
@@ -1312,7 +1369,8 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
        SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
        SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
        Overflow =
        SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
        SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
        Overflow =
-          DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
+          DAG.getNode(AArch64ISD::SUBS, DL, VTs,
+                      DAG.getConstant(0, DL, MVT::i64),
                        UpperBits).getValue(1);
      }
      break;
                        UpperBits).getValue(1);
      }
      break;
@@ -1331,10 +1389,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
  
  SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
                                               RTLIB::Libcall Call) const {
  
  SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
                                               RTLIB::Libcall Call) const {
-  SmallVector<SDValue, 2> Ops;
-  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
-    Ops.push_back(Op.getOperand(i));
-
+  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
    return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false,
                       SDLoc(Op)).first;
  }
    return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false,
                       SDLoc(Op)).first;
  }
@@ -1370,7 +1425,7 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
    ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
    ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
  
    ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
    ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
  
-  // The the values aren't constants, this isn't the pattern we're looking for.
+  // The values aren't constants, this isn't the pattern we're looking for.
    if (!CFVal || !CTVal)
      return Op;
  
    if (!CFVal || !CTVal)
      return Op;
  
@@ -1389,7 +1444,7 @@ static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
  
      FVal = Other;
      TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
  
      FVal = Other;
      TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
-                       DAG.getConstant(-1ULL, Other.getValueType()));
+                       DAG.getConstant(-1ULL, dl, Other.getValueType()));
  
      return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
                         CCVal, Cmp);
  
      return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
                         CCVal, Cmp);
@@ -1439,24 +1494,25 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
    if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
      return SDValue();
  
    if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
      return SDValue();
  
+  SDLoc dl(Op);
    AArch64CC::CondCode CC;
    // The actual operation that sets the overflow or carry flag.
    SDValue Value, Overflow;
    std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
  
    // We use 0 and 1 as false and true values.
    AArch64CC::CondCode CC;
    // The actual operation that sets the overflow or carry flag.
    SDValue Value, Overflow;
    std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
  
    // We use 0 and 1 as false and true values.
-  SDValue TVal = DAG.getConstant(1, MVT::i32);
-  SDValue FVal = DAG.getConstant(0, MVT::i32);
+  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
  
    // We use an inverted condition, because the conditional select is inverted
    // too. This will allow it to be selected to a single instruction:
    // CSINC Wd, WZR, WZR, invert(cond).
  
    // We use an inverted condition, because the conditional select is inverted
    // too. This will allow it to be selected to a single instruction:
    // CSINC Wd, WZR, WZR, invert(cond).
-  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), MVT::i32);
-  Overflow = DAG.getNode(AArch64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal,
+  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
+  Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
                           CCVal, Overflow);
  
    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
                           CCVal, Overflow);
  
    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
-  return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
+  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
  }
  
  // Prefetch operands are:
  }
  
  // Prefetch operands are:
@@ -1487,7 +1543,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
                     (Locality << 1) |    // Cache level bits
                     (unsigned)IsStream;  // Stream bit
    return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
                     (Locality << 1) |    // Cache level bits
                     (unsigned)IsStream;  // Stream bit
    return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
-                     DAG.getConstant(PrfOp, MVT::i32), Op.getOperand(1));
+                     DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
  }
  
  SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
  }
  
  SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
@@ -1551,6 +1607,14 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
    if (Op.getOperand(0).getValueType().isVector())
      return LowerVectorFP_TO_INT(Op, DAG);
  
    if (Op.getOperand(0).getValueType().isVector())
      return LowerVectorFP_TO_INT(Op, DAG);
  
+  // f16 conversions are promoted to f32.
+  if (Op.getOperand(0).getValueType() == MVT::f16) {
+    SDLoc dl(Op);
+    return DAG.getNode(
+        Op.getOpcode(), dl, Op.getValueType(),
+        DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
+  }
+
    if (Op.getOperand(0).getValueType() != MVT::f128) {
      // It's legal except when f128 is involved
      return Op;
    if (Op.getOperand(0).getValueType() != MVT::f128) {
      // It's legal except when f128 is involved
      return Op;
@@ -1562,10 +1626,7 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
    else
      LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
  
    else
      LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
  
-  SmallVector<SDValue, 2> Ops;
-  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
-    Ops.push_back(Op.getOperand(i));
-
+  SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
    return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false,
                       SDLoc(Op)).first;
  }
    return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false,
                       SDLoc(Op)).first;
  }
@@ -1584,7 +1645,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
          MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
                           InVT.getVectorNumElements());
      In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
          MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
                           InVT.getVectorNumElements());
      In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
-    return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0));
+    return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
    }
  
    if (VT.getSizeInBits() > InVT.getSizeInBits()) {
    }
  
    if (VT.getSizeInBits() > InVT.getSizeInBits()) {
@@ -1603,6 +1664,15 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
    if (Op.getValueType().isVector())
      return LowerVectorINT_TO_FP(Op, DAG);
  
    if (Op.getValueType().isVector())
      return LowerVectorINT_TO_FP(Op, DAG);
  
+  // f16 conversions are promoted to f32.
+  if (Op.getValueType() == MVT::f16) {
+    SDLoc dl(Op);
+    return DAG.getNode(
+        ISD::FP_ROUND, dl, MVT::f16,
+        DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
+        DAG.getIntPtrConstant(0, dl));
+  }
+
    // i128 conversions are libcalls.
    if (Op.getOperand(0).getValueType() == MVT::i128)
      return SDValue();
    // i128 conversions are libcalls.
    if (Op.getOperand(0).getValueType() == MVT::i128)
      return SDValue();
@@ -1641,7 +1711,8 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
  
    const char *LibcallName =
        (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
  
    const char *LibcallName =
        (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
-  SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
+  SDValue Callee =
+      DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
  
    StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
    TargetLowering::CallLoweringInfo CLI(DAG);
  
    StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
    TargetLowering::CallLoweringInfo CLI(DAG);
@@ -1663,7 +1734,7 @@ static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
    Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
    return SDValue(
        DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
    Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
    return SDValue(
        DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
-                         DAG.getTargetConstant(AArch64::hsub, MVT::i32)),
+                         DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
        0);
  }
  
        0);
  }
  
@@ -1708,8 +1779,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
    if (N->getOpcode() != ISD::BUILD_VECTOR)
      return false;
  
    if (N->getOpcode() != ISD::BUILD_VECTOR)
      return false;
  
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    SDNode *Elt = N->getOperand(i).getNode();
+  for (const SDValue &Elt : N->op_values()) {
      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
        unsigned EltSize = VT.getVectorElementType().getSizeInBits();
        unsigned HalfSize = EltSize / 2;
      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
        unsigned EltSize = VT.getVectorElementType().getSizeInBits();
        unsigned HalfSize = EltSize / 2;
@@ -1737,6 +1807,7 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
  
    assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
    EVT VT = N->getValueType(0);
  
    assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
    EVT VT = N->getValueType(0);
+  SDLoc dl(N);
    unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
    unsigned NumElts = VT.getVectorNumElements();
    MVT TruncVT = MVT::getIntegerVT(EltSize);
    unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
    unsigned NumElts = VT.getVectorNumElements();
    MVT TruncVT = MVT::getIntegerVT(EltSize);
@@ -1746,9 +1817,9 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
      const APInt &CInt = C->getAPIntValue();
      // Element types smaller than 32 bits are not legal, so use i32 elements.
      // The values are implicitly truncated so sext vs. zext doesn't matter.
      const APInt &CInt = C->getAPIntValue();
      // Element types smaller than 32 bits are not legal, so use i32 elements.
      // The values are implicitly truncated so sext vs. zext doesn't matter.
-    Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
+    Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
    }
    }
-  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
+  return DAG.getNode(ISD::BUILD_VECTOR, dl,
                       MVT::getVectorVT(TruncVT, NumElts), Ops);
  }
  
                       MVT::getVectorVT(TruncVT, NumElts), Ops);
  }
  
@@ -1982,6 +2053,8 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
      llvm_unreachable("Unsupported calling convention.");
    case CallingConv::WebKit_JS:
      return CC_AArch64_WebKit_JS;
      llvm_unreachable("Unsupported calling convention.");
    case CallingConv::WebKit_JS:
      return CC_AArch64_WebKit_JS;
+  case CallingConv::GHC:
+    return CC_AArch64_GHC;
    case CallingConv::C:
    case CallingConv::Fast:
      if (!Subtarget->isTargetDarwin())
    case CallingConv::C:
    case CallingConv::Fast:
      if (!Subtarget->isTargetDarwin())
@@ -2013,18 +2086,20 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
    unsigned CurArgIdx = 0;
    for (unsigned i = 0; i != NumArgs; ++i) {
      MVT ValVT = Ins[i].VT;
    unsigned CurArgIdx = 0;
    for (unsigned i = 0; i != NumArgs; ++i) {
      MVT ValVT = Ins[i].VT;
-    std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx);
-    CurArgIdx = Ins[i].OrigArgIndex;
-
-    // Get type of the original argument.
-    EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
-    MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
-    // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
-    if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
-      ValVT = MVT::i8;
-    else if (ActualMVT == MVT::i16)
-      ValVT = MVT::i16;
+    if (Ins[i].isOrigArg()) {
+      std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
+      CurArgIdx = Ins[i].getOrigArgIndex();
  
  
+      // Get type of the original argument.
+      EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
+                                  /*AllowUnknown*/ true);
+      MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
+      // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
+      if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
+        ValVT = MVT::i8;
+      else if (ActualMVT == MVT::i16)
+        ValVT = MVT::i16;
+    }
      CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
      bool Res =
          AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
      CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
      bool Res =
          AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
@@ -2039,7 +2114,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
      if (Ins[i].Flags.isByVal()) {
        // Byval is used for HFAs in the PCS, but the system should work in a
        // non-compliant manner for larger structs.
      if (Ins[i].Flags.isByVal()) {
        // Byval is used for HFAs in the PCS, but the system should work in a
        // non-compliant manner for larger structs.
-      EVT PtrTy = getPointerTy();
+      EVT PtrVT = getPointerTy(DAG.getDataLayout());
        int Size = Ins[i].Flags.getByValSize();
        unsigned NumRegs = (Size + 7) / 8;
  
        int Size = Ins[i].Flags.getByValSize();
        unsigned NumRegs = (Size + 7) / 8;
  
@@ -2047,7 +2122,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
        // case. It should also work for fundamental types too.
        unsigned FrameIdx =
          MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
        // case. It should also work for fundamental types too.
        unsigned FrameIdx =
          MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
-      SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
+      SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
        InVals.push_back(FrameIdxN);
  
        continue;
        InVals.push_back(FrameIdxN);
  
        continue;
@@ -2107,13 +2182,14 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
        unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
  
        uint32_t BEAlign = 0;
        unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
  
        uint32_t BEAlign = 0;
-      if (ArgSize < 8 && !Subtarget->isLittleEndian())
+      if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
+          !Ins[i].Flags.isInConsecutiveRegs())
          BEAlign = 8 - ArgSize;
  
        int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
  
        // Create load nodes to retrieve arguments from the stack.
          BEAlign = 8 - ArgSize;
  
        int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
  
        // Create load nodes to retrieve arguments from the stack.
-      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
        SDValue ArgValue;
  
        // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
        SDValue ArgValue;
  
        // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
@@ -2192,6 +2268,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
    AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
    AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
  
    SmallVector<SDValue, 8> MemOps;
  
  
    SmallVector<SDValue, 8> MemOps;
  
@@ -2199,15 +2276,14 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
                                            AArch64::X3, AArch64::X4, AArch64::X5,
                                            AArch64::X6, AArch64::X7 };
    static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
                                            AArch64::X3, AArch64::X4, AArch64::X5,
                                            AArch64::X6, AArch64::X7 };
    static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
-  unsigned FirstVariadicGPR =
-      CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs);
+  unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
  
    unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
    int GPRIdx = 0;
    if (GPRSaveSize != 0) {
      GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
  
  
    unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
    int GPRIdx = 0;
    if (GPRSaveSize != 0) {
      GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
  
-    SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
+    SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
  
      for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
        unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
  
      for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
        unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
@@ -2216,8 +2292,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
            DAG.getStore(Val.getValue(1), DL, Val, FIN,
                         MachinePointerInfo::getStack(i * 8), false, false, 0);
        MemOps.push_back(Store);
            DAG.getStore(Val.getValue(1), DL, Val, FIN,
                         MachinePointerInfo::getStack(i * 8), false, false, 0);
        MemOps.push_back(Store);
-      FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
-                        DAG.getConstant(8, getPointerTy()));
+      FIN =
+          DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
      }
    }
    FuncInfo->setVarArgsGPRIndex(GPRIdx);
      }
    }
    FuncInfo->setVarArgsGPRIndex(GPRIdx);
@@ -2228,15 +2304,14 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
          AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
          AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
      static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
          AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
          AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
      static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
-    unsigned FirstVariadicFPR =
-        CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs);
+    unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
  
      unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
      int FPRIdx = 0;
      if (FPRSaveSize != 0) {
        FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
  
  
      unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
      int FPRIdx = 0;
      if (FPRSaveSize != 0) {
        FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
  
-      SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
+      SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
  
        for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
          unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
  
        for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
          unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
@@ -2246,8 +2321,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
              DAG.getStore(Val.getValue(1), DL, Val, FIN,
                           MachinePointerInfo::getStack(i * 16), false, false, 0);
          MemOps.push_back(Store);
              DAG.getStore(Val.getValue(1), DL, Val, FIN,
                           MachinePointerInfo::getStack(i * 16), false, false, 0);
          MemOps.push_back(Store);
-        FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
-                          DAG.getConstant(16, getPointerTy()));
+        FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
+                          DAG.getConstant(16, DL, PtrVT));
        }
      }
      FuncInfo->setVarArgsFPRIndex(FPRIdx);
        }
      }
      FuncInfo->setVarArgsFPRIndex(FPRIdx);
@@ -2350,7 +2425,9 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
    // cannot rely on the linker replacing the tail call with a return.
    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
      const GlobalValue *GV = G->getGlobal();
    // cannot rely on the linker replacing the tail call with a return.
    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
      const GlobalValue *GV = G->getGlobal();
-    if (GV->hasExternalWeakLinkage())
+    const Triple &TT = getTargetMachine().getTargetTriple();
+    if (GV->hasExternalWeakLinkage() &&
+        (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
        return false;
    }
  
        return false;
    }
  
@@ -2541,7 +2618,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
      for (unsigned i = 0; i != NumArgs; ++i) {
        MVT ValVT = Outs[i].VT;
        // Get type of the original argument.
      for (unsigned i = 0; i != NumArgs; ++i) {
        MVT ValVT = Outs[i].VT;
        // Get type of the original argument.
-      EVT ActualVT = getValueType(CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
+      EVT ActualVT = getValueType(DAG.getDataLayout(),
+                                  CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
                                    /*AllowUnknown*/ true);
        MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
        ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
                                    /*AllowUnknown*/ true);
        MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
        ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
@@ -2597,13 +2675,16 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
    // Adjust the stack pointer for the new arguments...
    // These operations are automatically eliminated by the prolog/epilog pass
    if (!IsSibCall)
    // Adjust the stack pointer for the new arguments...
    // These operations are automatically eliminated by the prolog/epilog pass
    if (!IsSibCall)
-    Chain =
-        DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), DL);
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, DL,
+                                                              true),
+                                 DL);
  
  
-  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy());
+  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
+                                        getPointerTy(DAG.getDataLayout()));
  
    SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
    SmallVector<SDValue, 8> MemOpChains;
  
    SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
    SmallVector<SDValue, 8> MemOpChains;
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
  
    // Walk the register/memloc assignments, inserting copies/loads.
    for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
  
    // Walk the register/memloc assignments, inserting copies/loads.
    for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
@@ -2661,20 +2742,21 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
        unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
                                          : VA.getValVT().getSizeInBits();
        OpSize = (OpSize + 7) / 8;
        unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
                                          : VA.getValVT().getSizeInBits();
        OpSize = (OpSize + 7) / 8;
-      if (!Subtarget->isLittleEndian() && !Flags.isByVal()) {
+      if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
+          !Flags.isInConsecutiveRegs()) {
          if (OpSize < 8)
            BEAlign = 8 - OpSize;
        }
        unsigned LocMemOffset = VA.getLocMemOffset();
        int32_t Offset = LocMemOffset + BEAlign;
          if (OpSize < 8)
            BEAlign = 8 - OpSize;
        }
        unsigned LocMemOffset = VA.getLocMemOffset();
        int32_t Offset = LocMemOffset + BEAlign;
-      SDValue PtrOff = DAG.getIntPtrConstant(Offset);
-      PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+      SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
+      PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
  
        if (IsTailCall) {
          Offset = Offset + FPDiff;
          int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
  
  
        if (IsTailCall) {
          Offset = Offset + FPDiff;
          int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
  
-        DstAddr = DAG.getFrameIndex(FI, getPointerTy());
+        DstAddr = DAG.getFrameIndex(FI, PtrVT);
          DstInfo = MachinePointerInfo::getFixedStack(FI);
  
          // Make sure any stack arguments overlapping with where we're storing
          DstInfo = MachinePointerInfo::getFixedStack(FI);
  
          // Make sure any stack arguments overlapping with where we're storing
@@ -2682,19 +2764,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
          // clobbered.
          Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
        } else {
          // clobbered.
          Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
        } else {
-        SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+        SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
  
  
-        DstAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+        DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
          DstInfo = MachinePointerInfo::getStack(LocMemOffset);
        }
  
        if (Outs[i].Flags.isByVal()) {
          SDValue SizeNode =
          DstInfo = MachinePointerInfo::getStack(LocMemOffset);
        }
  
        if (Outs[i].Flags.isByVal()) {
          SDValue SizeNode =
-            DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64);
+            DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
          SDValue Cpy = DAG.getMemcpy(
              Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
          SDValue Cpy = DAG.getMemcpy(
              Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
-            /*isVol = */ false,
-            /*AlwaysInline = */ false, DstInfo, MachinePointerInfo());
+            /*isVol = */ false, /*AlwaysInline = */ false,
+            /*isTailCall = */ false,
+            DstInfo, MachinePointerInfo());
  
          MemOpChains.push_back(Cpy);
        } else {
  
          MemOpChains.push_back(Cpy);
        } else {
@@ -2733,25 +2816,24 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
        const GlobalValue *GV = G->getGlobal();
        bool InternalLinkage = GV->hasInternalLinkage();
        if (InternalLinkage)
        const GlobalValue *GV = G->getGlobal();
        bool InternalLinkage = GV->hasInternalLinkage();
        if (InternalLinkage)
-        Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
+        Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
        else {
        else {
-        Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0,
-                                            AArch64II::MO_GOT);
-        Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
+        Callee =
+            DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
+        Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
        }
      } else if (ExternalSymbolSDNode *S =
                     dyn_cast<ExternalSymbolSDNode>(Callee)) {
        const char *Sym = S->getSymbol();
        }
      } else if (ExternalSymbolSDNode *S =
                     dyn_cast<ExternalSymbolSDNode>(Callee)) {
        const char *Sym = S->getSymbol();
-      Callee =
-          DAG.getTargetExternalSymbol(Sym, getPointerTy(), AArch64II::MO_GOT);
-      Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
+      Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
+      Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
      }
    } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
      const GlobalValue *GV = G->getGlobal();
      }
    } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
      const GlobalValue *GV = G->getGlobal();
-    Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
+    Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
      const char *Sym = S->getSymbol();
    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
      const char *Sym = S->getSymbol();
-    Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), 0);
+    Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
    }
  
    // We don't usually want to end the call-sequence here because we would tidy
    }
  
    // We don't usually want to end the call-sequence here because we would tidy
@@ -2759,8 +2841,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
    // we've carefully laid out the parameters so that when sp is reset they'll be
    // in the correct location.
    if (IsTailCall && !IsSibCall) {
    // we've carefully laid out the parameters so that when sp is reset they'll be
    // in the correct location.
    if (IsTailCall && !IsSibCall) {
-    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                               DAG.getIntPtrConstant(0, true), InFlag, DL);
+    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
+                               DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
      InFlag = Chain.getValue(1);
    }
  
      InFlag = Chain.getValue(1);
    }
  
@@ -2772,7 +2854,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
      // Each tail call may have to adjust the stack by a different amount, so
      // this information must travel along with the operation for eventual
      // consumption by emitEpilogue.
      // Each tail call may have to adjust the stack by a different amount, so
      // this information must travel along with the operation for eventual
      // consumption by emitEpilogue.
-    Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
+    Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
    }
  
    // Add argument registers to the end of the list so that they are known live
    }
  
    // Add argument registers to the end of the list so that they are known live
@@ -2783,19 +2865,16 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
  
    // Add a register mask operand representing the call-preserved registers.
    const uint32_t *Mask;
  
    // Add a register mask operand representing the call-preserved registers.
    const uint32_t *Mask;
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const AArch64RegisterInfo *ARI =
-      static_cast<const AArch64RegisterInfo *>(TRI);
+  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
    if (IsThisReturn) {
      // For 'this' returns, use the X0-preserving mask if applicable
    if (IsThisReturn) {
      // For 'this' returns, use the X0-preserving mask if applicable
-    Mask = ARI->getThisReturnPreservedMask(CallConv);
+    Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
      if (!Mask) {
        IsThisReturn = false;
      if (!Mask) {
        IsThisReturn = false;
-      Mask = ARI->getCallPreservedMask(CallConv);
+      Mask = TRI->getCallPreservedMask(MF, CallConv);
      }
    } else
      }
    } else
-    Mask = ARI->getCallPreservedMask(CallConv);
+    Mask = TRI->getCallPreservedMask(MF, CallConv);
  
    assert(Mask && "Missing call preserved mask for calling convention");
    Ops.push_back(DAG.getRegisterMask(Mask));
  
    assert(Mask && "Missing call preserved mask for calling convention");
    Ops.push_back(DAG.getRegisterMask(Mask));
@@ -2807,8 +2886,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
  
    // If we're doing a tall call, use a TC_RETURN here rather than an
    // actual call instruction.
  
    // If we're doing a tall call, use a TC_RETURN here rather than an
    // actual call instruction.
-  if (IsTailCall)
+  if (IsTailCall) {
+    MF.getFrameInfo()->setHasTailCall();
      return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
      return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
+  }
  
    // Returns a chain and a flag for retval copy to use.
    Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
  
    // Returns a chain and a flag for retval copy to use.
    Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
@@ -2818,8 +2899,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
                                  ? RoundUpToAlignment(NumBytes, 16)
                                  : 0;
  
                                  ? RoundUpToAlignment(NumBytes, 16)
                                  : 0;
  
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                             DAG.getIntPtrConstant(CalleePopBytes, true),
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
+                             DAG.getIntPtrConstant(CalleePopBytes, DL, true),
                               InFlag, DL);
    if (!Ins.empty())
      InFlag = Chain.getValue(1);
                               InFlag, DL);
    if (!Ins.empty())
      InFlag = Chain.getValue(1);
@@ -2902,7 +2983,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
  
  SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
                                                    SelectionDAG &DAG) const {
  
  SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
                                                    SelectionDAG &DAG) const {
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
    SDLoc DL(Op);
    const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
    const GlobalValue *GV = GN->getGlobal();
    SDLoc DL(Op);
    const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
    const GlobalValue *GV = GN->getGlobal();
@@ -2935,7 +3016,7 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
                                       /*isInvariant=*/ true, 8);
      if (GN->getOffset() != 0)
        return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalAddr,
                                       /*isInvariant=*/ true, 8);
      if (GN->getOffset() != 0)
        return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalAddr,
-                         DAG.getConstant(GN->getOffset(), PtrVT));
+                         DAG.getConstant(GN->getOffset(), DL, PtrVT));
      return GlobalAddr;
    }
  
      return GlobalAddr;
    }
  
@@ -2994,7 +3075,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
    assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
  
    SDLoc DL(Op);
    assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
  
    SDLoc DL(Op);
-  MVT PtrVT = getPointerTy();
+  MVT PtrVT = getPointerTy(DAG.getDataLayout());
    const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
  
    SDValue TLVPAddr =
    const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
  
    SDValue TLVPAddr =
@@ -3015,11 +3096,8 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
    // TLS calls preserve all registers except those that absolutely must be
    // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
    // silly).
    // TLS calls preserve all registers except those that absolutely must be
    // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
    // silly).
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const AArch64RegisterInfo *ARI =
-      static_cast<const AArch64RegisterInfo *>(TRI);
-  const uint32_t *Mask = ARI->getTLSCallPreservedMask();
+  const uint32_t *Mask =
+      Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
  
    // Finally, we can make the call. This is just a degenerate version of a
    // normal AArch64 call node: x0 takes the address of the descriptor, and
  
    // Finally, we can make the call. This is just a degenerate version of a
    // normal AArch64 call node: x0 takes the address of the descriptor, and
@@ -3035,61 +3113,34 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
  /// When accessing thread-local variables under either the general-dynamic or
  /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
  /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
  /// When accessing thread-local variables under either the general-dynamic or
  /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
  /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
-/// is a function pointer to carry out the resolution. This function takes the
-/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All
-/// other registers (except LR, NZCV) are preserved.
-///
-/// Thus, the ideal call sequence on AArch64 is:
-///
-///     adrp x0, :tlsdesc:thread_var
-///     ldr x8, [x0, :tlsdesc_lo12:thread_var]
-///     add x0, x0, :tlsdesc_lo12:thread_var
-///     .tlsdesccall thread_var
-///     blr x8
-///     (TPIDR_EL0 offset now in x0).
+/// is a function pointer to carry out the resolution.
  ///
  ///
-/// The ".tlsdesccall" directive instructs the assembler to insert a particular
-/// relocation to help the linker relax this sequence if it turns out to be too
-/// conservative.
+/// The sequence is:
+///    adrp  x0, :tlsdesc:var
+///    ldr   x1, [x0, #:tlsdesc_lo12:var]
+///    add   x0, x0, #:tlsdesc_lo12:var
+///    .tlsdesccall var
+///    blr   x1
+///    (TPIDR_EL0 offset now in x0)
  ///
  ///
-/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
-/// is harmless.
-SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
-                                                   SDValue DescAddr, SDLoc DL,
-                                                   SelectionDAG &DAG) const {
-  EVT PtrVT = getPointerTy();
+///  The above sequence must be produced unscheduled, to enable the linker to
+///  optimize/relax this sequence.
+///  Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
+///  above sequence, and expanded really late in the compilation flow, to ensure
+///  the sequence is produced as per above.
+SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, SDLoc DL,
+                                                      SelectionDAG &DAG) const {
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
  
  
-  // The function we need to call is simply the first entry in the GOT for this
-  // descriptor, load it in preparation.
-  SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr);
+  SDValue Chain = DAG.getEntryNode();
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
  
  
-  // TLS calls preserve all registers except those that absolutely must be
-  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
-  // silly).
-  const TargetRegisterInfo *TRI =
-      getTargetMachine().getSubtargetImpl()->getRegisterInfo();
-  const AArch64RegisterInfo *ARI =
-      static_cast<const AArch64RegisterInfo *>(TRI);
-  const uint32_t *Mask = ARI->getTLSCallPreservedMask();
-
-  // The function takes only one argument: the address of the descriptor itself
-  // in X0.
-  SDValue Glue, Chain;
-  Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
-  Glue = Chain.getValue(1);
-
-  // We're now ready to populate the argument list, as with a normal call:
-  SmallVector<SDValue, 6> Ops;
+  SmallVector<SDValue, 2> Ops;
    Ops.push_back(Chain);
    Ops.push_back(Chain);
-  Ops.push_back(Func);
    Ops.push_back(SymAddr);
    Ops.push_back(SymAddr);
-  Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
-  Ops.push_back(DAG.getRegisterMask(Mask));
-  Ops.push_back(Glue);
  
  
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
-  Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops);
-  Glue = Chain.getValue(1);
+  Chain = DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, Ops);
+  SDValue Glue = Chain.getValue(1);
  
    return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
  }
  
    return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
  }
@@ -3100,12 +3151,21 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
    assert(Subtarget->isTargetELF() && "This function expects an ELF target");
    assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
           "ELF TLS only supported in small memory model");
    assert(Subtarget->isTargetELF() && "This function expects an ELF target");
    assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
           "ELF TLS only supported in small memory model");
+  // Different choices can be made for the maximum size of the TLS area for a
+  // module. For the small address model, the default TLS size is 16MiB and the
+  // maximum TLS size is 4GiB.
+  // FIXME: add -mtls-size command line option and make it control the 16MiB
+  // vs. 4GiB code sequence generation.
    const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
  
    TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
    const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
  
    TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+  if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
+    if (Model == TLSModel::LocalDynamic)
+      Model = TLSModel::GeneralDynamic;
+  }
  
    SDValue TPOff;
  
    SDValue TPOff;
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
    SDLoc DL(Op);
    const GlobalValue *GV = GA->getGlobal();
  
    SDLoc DL(Op);
    const GlobalValue *GV = GA->getGlobal();
  
@@ -3113,17 +3173,22 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
  
    if (Model == TLSModel::LocalExec) {
      SDValue HiVar = DAG.getTargetGlobalAddress(
  
    if (Model == TLSModel::LocalExec) {
      SDValue HiVar = DAG.getTargetGlobalAddress(
-        GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+        GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
      SDValue LoVar = DAG.getTargetGlobalAddress(
          GV, DL, PtrVT, 0,
      SDValue LoVar = DAG.getTargetGlobalAddress(
          GV, DL, PtrVT, 0,
-        AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+        AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
  
  
-    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
-                                       DAG.getTargetConstant(16, MVT::i32)),
-                    0);
-    TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
-                                       DAG.getTargetConstant(0, MVT::i32)),
-                    0);
+    SDValue TPWithOff_lo =
+        SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
+                                   HiVar,
+                                   DAG.getTargetConstant(0, DL, MVT::i32)),
+                0);
+    SDValue TPWithOff =
+        SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
+                                   LoVar,
+                                   DAG.getTargetConstant(0, DL, MVT::i32)),
+                0);
+    return TPWithOff;
    } else if (Model == TLSModel::InitialExec) {
      TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
      TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
    } else if (Model == TLSModel::InitialExec) {
      TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
      TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
@@ -3138,19 +3203,6 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
          DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
      MFI->incNumLocalDynamicTLSAccesses();
  
          DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
      MFI->incNumLocalDynamicTLSAccesses();
  
-    // Accesses used in this sequence go via the TLS descriptor which lives in
-    // the GOT. Prepare an address we can use to handle this.
-    SDValue HiDesc = DAG.getTargetExternalSymbol(
-        "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE);
-    SDValue LoDesc = DAG.getTargetExternalSymbol(
-        "_TLS_MODULE_BASE_", PtrVT,
-        AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
-
-    // First argument to the descriptor call is the address of the descriptor
-    // itself.
-    SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
-    DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
      // The call needs a relocation too for linker relaxation. It doesn't make
      // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
      // the address.
      // The call needs a relocation too for linker relaxation. It doesn't make
      // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
      // the address.
@@ -3159,40 +3211,23 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
  
      // Now we can calculate the offset from TPIDR_EL0 to this module's
      // thread-local area.
  
      // Now we can calculate the offset from TPIDR_EL0 to this module's
      // thread-local area.
-    TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+    TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
  
      // Now use :dtprel_whatever: operations to calculate this variable's offset
      // in its thread-storage area.
      SDValue HiVar = DAG.getTargetGlobalAddress(
  
      // Now use :dtprel_whatever: operations to calculate this variable's offset
      // in its thread-storage area.
      SDValue HiVar = DAG.getTargetGlobalAddress(
-        GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+        GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
      SDValue LoVar = DAG.getTargetGlobalAddress(
          GV, DL, MVT::i64, 0,
      SDValue LoVar = DAG.getTargetGlobalAddress(
          GV, DL, MVT::i64, 0,
-        AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
-
-    SDValue DTPOff =
-        SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
-                                   DAG.getTargetConstant(16, MVT::i32)),
-                0);
-    DTPOff =
-        SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
-                                   DAG.getTargetConstant(0, MVT::i32)),
-                0);
-
-    TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
-  } else if (Model == TLSModel::GeneralDynamic) {
-    // Accesses used in this sequence go via the TLS descriptor which lives in
-    // the GOT. Prepare an address we can use to handle this.
-    SDValue HiDesc = DAG.getTargetGlobalAddress(
-        GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE);
-    SDValue LoDesc = DAG.getTargetGlobalAddress(
-        GV, DL, PtrVT, 0,
          AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
  
          AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
  
-    // First argument to the descriptor call is the address of the descriptor
-    // itself.
-    SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
-    DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
+    TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
+                                       DAG.getTargetConstant(0, DL, MVT::i32)),
+                    0);
+    TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
+                                       DAG.getTargetConstant(0, DL, MVT::i32)),
+                    0);
+  } else if (Model == TLSModel::GeneralDynamic) {
      // The call needs a relocation too for linker relaxation. It doesn't make
      // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
      // the address.
      // The call needs a relocation too for linker relaxation. It doesn't make
      // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
      // the address.
@@ -3200,7 +3235,7 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
          DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
  
      // Finally we can make a call to calculate the offset from tpidr_el0.
          DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
  
      // Finally we can make a call to calculate the offset from tpidr_el0.
-    TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+    TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
    } else
      llvm_unreachable("Unsupported ELF TLS access model");
  
    } else
      llvm_unreachable("Unsupported ELF TLS access model");
  
@@ -3233,7 +3268,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
      // If softenSetCCOperands returned a scalar, we need to compare the result
      // against zero to select between true and false values.
      if (!RHS.getNode()) {
      // If softenSetCCOperands returned a scalar, we need to compare the result
      // against zero to select between true and false values.
      if (!RHS.getNode()) {
-      RHS = DAG.getConstant(0, LHS.getValueType());
+      RHS = DAG.getConstant(0, dl, LHS.getValueType());
        CC = ISD::SETNE;
      }
    }
        CC = ISD::SETNE;
      }
    }
@@ -3258,10 +3293,10 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
  
      if (CC == ISD::SETNE)
        OFCC = getInvertedCondCode(OFCC);
  
      if (CC == ISD::SETNE)
        OFCC = getInvertedCondCode(OFCC);
-    SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
+    SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
  
  
-    return DAG.getNode(AArch64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest,
-                       CCVal, Overflow);
+    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+                       Overflow);
    }
  
    if (LHS.getValueType().isInteger()) {
    }
  
    if (LHS.getValueType().isInteger()) {
@@ -3283,7 +3318,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
            SDValue Test = LHS.getOperand(0);
            uint64_t Mask = LHS.getConstantOperandVal(1);
            return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
            SDValue Test = LHS.getOperand(0);
            uint64_t Mask = LHS.getConstantOperandVal(1);
            return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
-                             DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
+                             DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
+                             Dest);
          }
  
          return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
          }
  
          return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
@@ -3298,7 +3334,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
            SDValue Test = LHS.getOperand(0);
            uint64_t Mask = LHS.getConstantOperandVal(1);
            return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
            SDValue Test = LHS.getOperand(0);
            uint64_t Mask = LHS.getConstantOperandVal(1);
            return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
-                             DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
+                             DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
+                             Dest);
          }
  
          return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
          }
  
          return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
@@ -3308,7 +3345,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
          // becomes redundant.  This would also increase register pressure.
          uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
          return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
          // becomes redundant.  This would also increase register pressure.
          uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
          return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
-                           DAG.getConstant(Mask, MVT::i64), Dest);
+                           DAG.getConstant(Mask, dl, MVT::i64), Dest);
        }
      }
      if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
        }
      }
      if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
@@ -3318,7 +3355,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
        // becomes redundant.  This would also increase register pressure.
        uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
        return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
        // becomes redundant.  This would also increase register pressure.
        uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
        return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
-                         DAG.getConstant(Mask, MVT::i64), Dest);
+                         DAG.getConstant(Mask, dl, MVT::i64), Dest);
      }
  
      SDValue CCVal;
      }
  
      SDValue CCVal;
@@ -3334,11 +3371,11 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
    SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
    AArch64CC::CondCode CC1, CC2;
    changeFPCCToAArch64CC(CC, CC1, CC2);
    SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
    AArch64CC::CondCode CC1, CC2;
    changeFPCCToAArch64CC(CC, CC1, CC2);
-  SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
    SDValue BR1 =
        DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
    if (CC2 != AArch64CC::AL) {
    SDValue BR1 =
        DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
    if (CC2 != AArch64CC::AL) {
-    SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+    SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
      return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
                         Cmp);
    }
      return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
                         Cmp);
    }
@@ -3358,7 +3395,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
      if (SrcVT == MVT::f32 && VT == MVT::f64)
        In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
      else if (SrcVT == MVT::f64 && VT == MVT::f32)
      if (SrcVT == MVT::f32 && VT == MVT::f64)
        In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
      else if (SrcVT == MVT::f64 && VT == MVT::f32)
-      In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0));
+      In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2,
+                        DAG.getIntPtrConstant(0, DL));
      else
        // FIXME: Src type is different, bail out for now. Can VT really be a
        // vector type?
      else
        // FIXME: Src type is different, bail out for now. Can VT really be a
        // vector type?
@@ -3367,11 +3405,12 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
  
    EVT VecVT;
    EVT EltVT;
  
    EVT VecVT;
    EVT EltVT;
-  SDValue EltMask, VecVal1, VecVal2;
+  uint64_t EltMask;
+  SDValue VecVal1, VecVal2;
    if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
      EltVT = MVT::i32;
      VecVT = MVT::v4i32;
    if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
      EltVT = MVT::i32;
      VecVT = MVT::v4i32;
-    EltMask = DAG.getConstant(0x80000000ULL, EltVT);
+    EltMask = 0x80000000ULL;
  
      if (!VT.isVector()) {
        VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
  
      if (!VT.isVector()) {
        VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
@@ -3386,10 +3425,10 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
      EltVT = MVT::i64;
      VecVT = MVT::v2i64;
  
      EltVT = MVT::i64;
      VecVT = MVT::v2i64;
  
-    // We want to materialize a mask with the the high bit set, but the AdvSIMD
+    // We want to materialize a mask with the high bit set, but the AdvSIMD
      // immediate moves cannot materialize that in a single instruction for
      // 64-bit elements. Instead, materialize zero and then negate it.
      // immediate moves cannot materialize that in a single instruction for
      // 64-bit elements. Instead, materialize zero and then negate it.
-    EltMask = DAG.getConstant(0, EltVT);
+    EltMask = 0;
  
      if (!VT.isVector()) {
        VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
  
      if (!VT.isVector()) {
        VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
@@ -3404,11 +3443,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
      llvm_unreachable("Invalid type for copysign!");
    }
  
      llvm_unreachable("Invalid type for copysign!");
    }
  
-  std::vector<SDValue> BuildVectorOps;
-  for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i)
-    BuildVectorOps.push_back(EltMask);
-
-  SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, BuildVectorOps);
+  SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
  
    // If we couldn't materialize the mask above, then the mask vector will be
    // the zero vector, and we need to negate it here.
  
    // If we couldn't materialize the mask above, then the mask vector will be
    // the zero vector, and we need to negate it here.
@@ -3430,8 +3465,8 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
  }
  
  SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
  }
  
  SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
-  if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
-          AttributeSet::FunctionIndex, Attribute::NoImplicitFloat))
+  if (DAG.getMachineFunction().getFunction()->hasFnAttribute(
+          Attribute::NoImplicitFloat))
      return SDValue();
  
    if (!Subtarget->hasNEON())
      return SDValue();
  
    if (!Subtarget->hasNEON())
@@ -3448,21 +3483,15 @@ SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
    SDValue Val = Op.getOperand(0);
    SDLoc DL(Op);
    EVT VT = Op.getValueType();
    SDValue Val = Op.getOperand(0);
    SDLoc DL(Op);
    EVT VT = Op.getValueType();
-  SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8);
  
  
-  SDValue VecVal;
-  if (VT == MVT::i32) {
-    VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
-    VecVal = DAG.getTargetInsertSubreg(AArch64::ssub, DL, MVT::v8i8, ZeroVec,
-                                       VecVal);
-  } else {
-    VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
-  }
+  if (VT == MVT::i32)
+    Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+  Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
  
  
-  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal);
+  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
    SDValue UaddLV = DAG.getNode(
        ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
    SDValue UaddLV = DAG.getNode(
        ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
-      DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, MVT::i32), CtPop);
+      DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
  
    if (VT == MVT::i64)
      UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
  
    if (VT == MVT::i64)
      UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
@@ -3481,8 +3510,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
  
    // We chose ZeroOrOneBooleanContents, so use zero and one.
    EVT VT = Op.getValueType();
  
    // We chose ZeroOrOneBooleanContents, so use zero and one.
    EVT VT = Op.getValueType();
-  SDValue TVal = DAG.getConstant(1, VT);
-  SDValue FVal = DAG.getConstant(0, VT);
+  SDValue TVal = DAG.getConstant(1, dl, VT);
+  SDValue FVal = DAG.getConstant(0, dl, VT);
  
    // Handle f128 first, since one possible outcome is a normal integer
    // comparison which gets picked up by the next if statement.
  
    // Handle f128 first, since one possible outcome is a normal integer
    // comparison which gets picked up by the next if statement.
@@ -3519,7 +3548,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
    changeFPCCToAArch64CC(CC, CC1, CC2);
    if (CC2 == AArch64CC::AL) {
      changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
    changeFPCCToAArch64CC(CC, CC1, CC2);
    if (CC2 == AArch64CC::AL) {
      changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
-    SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+    SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
  
      // Note that we inverted the condition above, so we reverse the order of
      // the true and false operands here.  This will allow the setcc to be
  
      // Note that we inverted the condition above, so we reverse the order of
      // the true and false operands here.  This will allow the setcc to be
@@ -3532,11 +3561,11 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
      // of the first as the RHS.  We're effectively OR'ing the two CC's together.
  
      // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
      // of the first as the RHS.  We're effectively OR'ing the two CC's together.
  
      // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
-    SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+    SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
      SDValue CS1 =
          DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
  
      SDValue CS1 =
          DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
  
-    SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+    SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
      return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
    }
  }
      return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
    }
  }
@@ -3551,7 +3580,8 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
  /// operations would *not* be semantically equivalent.
  static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
    if (Cmp == Result)
  /// operations would *not* be semantically equivalent.
  static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
    if (Cmp == Result)
-    return true;
+    return (Cmp.getValueType() == MVT::f32 ||
+            Cmp.getValueType() == MVT::f64);
  
    ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
    ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
  
    ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
    ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
@@ -3566,49 +3596,10 @@ static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
    return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
  }
  
    return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
  }
  
-SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
-                                           SelectionDAG &DAG) const {
-  SDValue CC = Op->getOperand(0);
-  SDValue TVal = Op->getOperand(1);
-  SDValue FVal = Op->getOperand(2);
-  SDLoc DL(Op);
-
-  unsigned Opc = CC.getOpcode();
-  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
-  // instruction.
-  if (CC.getResNo() == 1 &&
-      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
-       Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
-    // Only lower legal XALUO ops.
-    if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0)))
-      return SDValue();
-
-    AArch64CC::CondCode OFCC;
-    SDValue Value, Overflow;
-    std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG);
-    SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
-
-    return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
-                       CCVal, Overflow);
-  }
-
-  if (CC.getOpcode() == ISD::SETCC)
-    return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal,
-                           cast<CondCodeSDNode>(CC.getOperand(2))->get());
-  else
-    return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal,
-                           FVal, ISD::SETNE);
-}
-
-SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
+                                              SDValue RHS, SDValue TVal,
+                                              SDValue FVal, SDLoc dl,
                                                SelectionDAG &DAG) const {
                                                SelectionDAG &DAG) const {
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
-  SDValue LHS = Op.getOperand(0);
-  SDValue RHS = Op.getOperand(1);
-  SDValue TVal = Op.getOperand(2);
-  SDValue FVal = Op.getOperand(3);
-  SDLoc dl(Op);
-
    // Handle f128 first, because it will result in a comparison of some RTLIB
    // call result against zero.
    if (LHS.getValueType() == MVT::f128) {
    // Handle f128 first, because it will result in a comparison of some RTLIB
    // call result against zero.
    if (LHS.getValueType() == MVT::f128) {
@@ -3617,7 +3608,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
      // If softenSetCCOperands returned a scalar, we need to compare the result
      // against zero to select between true and false values.
      if (!RHS.getNode()) {
      // If softenSetCCOperands returned a scalar, we need to compare the result
      // against zero to select between true and false values.
      if (!RHS.getNode()) {
-      RHS = DAG.getConstant(0, LHS.getValueType());
+      RHS = DAG.getConstant(0, dl, LHS.getValueType());
        CC = ISD::SETNE;
      }
    }
        CC = ISD::SETNE;
      }
    }
@@ -3716,67 +3707,27 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
      SDValue CCVal;
      SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
  
      SDValue CCVal;
      SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
  
-    EVT VT = Op.getValueType();
+    EVT VT = TVal.getValueType();
      return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
    }
  
    // Now we know we're dealing with FP values.
    assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
    assert(LHS.getValueType() == RHS.getValueType());
      return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
    }
  
    // Now we know we're dealing with FP values.
    assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
    assert(LHS.getValueType() == RHS.getValueType());
-  EVT VT = Op.getValueType();
-
-  // Try to match this select into a max/min operation, which have dedicated
-  // opcode in the instruction set.
-  // FIXME: This is not correct in the presence of NaNs, so we only enable this
-  // in no-NaNs mode.
-  if (getTargetMachine().Options.NoNaNsFPMath) {
-    SDValue MinMaxLHS = TVal, MinMaxRHS = FVal;
-    if (selectCCOpsAreFMaxCompatible(LHS, MinMaxRHS) &&
-        selectCCOpsAreFMaxCompatible(RHS, MinMaxLHS)) {
-      CC = ISD::getSetCCSwappedOperands(CC);
-      std::swap(MinMaxLHS, MinMaxRHS);
-    }
-
-    if (selectCCOpsAreFMaxCompatible(LHS, MinMaxLHS) &&
-        selectCCOpsAreFMaxCompatible(RHS, MinMaxRHS)) {
-      switch (CC) {
-      default:
-        break;
-      case ISD::SETGT:
-      case ISD::SETGE:
-      case ISD::SETUGT:
-      case ISD::SETUGE:
-      case ISD::SETOGT:
-      case ISD::SETOGE:
-        return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS);
-        break;
-      case ISD::SETLT:
-      case ISD::SETLE:
-      case ISD::SETULT:
-      case ISD::SETULE:
-      case ISD::SETOLT:
-      case ISD::SETOLE:
-        return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS);
-        break;
-      }
-    }
-  }
-
-  // If that fails, we'll need to perform an FCMP + CSEL sequence.  Go ahead
-  // and do the comparison.
+  EVT VT = TVal.getValueType();
    SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
  
    // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
    // clean.  Some of them require two CSELs to implement.
    AArch64CC::CondCode CC1, CC2;
    changeFPCCToAArch64CC(CC, CC1, CC2);
    SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
  
    // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
    // clean.  Some of them require two CSELs to implement.
    AArch64CC::CondCode CC1, CC2;
    changeFPCCToAArch64CC(CC, CC1, CC2);
-  SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
    SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
  
    // If we need a second CSEL, emit it, using the output of the first as the
    // RHS.  We're effectively OR'ing the two CC's together.
    if (CC2 != AArch64CC::AL) {
    SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
  
    // If we need a second CSEL, emit it, using the output of the first as the
    // RHS.  We're effectively OR'ing the two CC's together.
    if (CC2 != AArch64CC::AL) {
-    SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+    SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
      return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
    }
  
      return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
    }
  
@@ -3784,12 +3735,64 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
    return CS1;
  }
  
    return CS1;
  }
  
+SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue TVal = Op.getOperand(2);
+  SDValue FVal = Op.getOperand(3);
+  SDLoc DL(Op);
+  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
+SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDValue CCVal = Op->getOperand(0);
+  SDValue TVal = Op->getOperand(1);
+  SDValue FVal = Op->getOperand(2);
+  SDLoc DL(Op);
+
+  unsigned Opc = CCVal.getOpcode();
+  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
+  // instruction.
+  if (CCVal.getResNo() == 1 &&
+      (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+       Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
+    // Only lower legal XALUO ops.
+    if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
+      return SDValue();
+
+    AArch64CC::CondCode OFCC;
+    SDValue Value, Overflow;
+    std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
+    SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
+
+    return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
+                       CCVal, Overflow);
+  }
+
+  // Lower it the same way as we would lower a SELECT_CC node.
+  ISD::CondCode CC;
+  SDValue LHS, RHS;
+  if (CCVal.getOpcode() == ISD::SETCC) {
+    LHS = CCVal.getOperand(0);
+    RHS = CCVal.getOperand(1);
+    CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
+  } else {
+    LHS = CCVal;
+    RHS = DAG.getConstant(0, DL, CCVal.getValueType());
+    CC = ISD::SETNE;
+  }
+  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
+}
+
  SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
                                                SelectionDAG &DAG) const {
    // Jump table entries as PC relative offsets. No additional tweaking
    // is necessary here. Just get the address of the jump table.
    JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
  SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
                                                SelectionDAG &DAG) const {
    // Jump table entries as PC relative offsets. No additional tweaking
    // is necessary here. Just get the address of the jump table.
    JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
    SDLoc DL(Op);
  
    if (getTargetMachine().getCodeModel() == CodeModel::Large &&
    SDLoc DL(Op);
  
    if (getTargetMachine().getCodeModel() == CodeModel::Large &&
@@ -3815,7 +3818,7 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
  SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
                                                   SelectionDAG &DAG) const {
    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
  SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
                                                   SelectionDAG &DAG) const {
    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
    SDLoc DL(Op);
  
    if (getTargetMachine().getCodeModel() == CodeModel::Large) {
    SDLoc DL(Op);
  
    if (getTargetMachine().getCodeModel() == CodeModel::Large) {
@@ -3856,7 +3859,7 @@ SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
  SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
                                                 SelectionDAG &DAG) const {
    const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
  SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
                                                 SelectionDAG &DAG) const {
    const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
-  EVT PtrVT = getPointerTy();
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
    SDLoc DL(Op);
    if (getTargetMachine().getCodeModel() == CodeModel::Large &&
        !Subtarget->isTargetMachO()) {
    SDLoc DL(Op);
    if (getTargetMachine().getCodeModel() == CodeModel::Large &&
        !Subtarget->isTargetMachO()) {
@@ -3882,8 +3885,8 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
        DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
  
    SDLoc DL(Op);
        DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
  
    SDLoc DL(Op);
-  SDValue FR =
-      DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
+  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
+                                 getPointerTy(DAG.getDataLayout()));
    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
    return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
                        MachinePointerInfo(SV), false, false, 0);
    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
    return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
                        MachinePointerInfo(SV), false, false, 0);
@@ -3895,6 +3898,7 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
    // Standard, section B.3.
    MachineFunction &MF = DAG.getMachineFunction();
    AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
    // Standard, section B.3.
    MachineFunction &MF = DAG.getMachineFunction();
    AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
    SDLoc DL(Op);
  
    SDValue Chain = Op.getOperand(0);
    SDLoc DL(Op);
  
    SDValue Chain = Op.getOperand(0);
@@ -3903,8 +3907,7 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
    SmallVector<SDValue, 4> MemOps;
  
    // void *__stack at offset 0
    SmallVector<SDValue, 4> MemOps;
  
    // void *__stack at offset 0
-  SDValue Stack =
-      DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
+  SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
    MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
                                  MachinePointerInfo(SV), false, false, 8));
  
    MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
                                  MachinePointerInfo(SV), false, false, 8));
  
@@ -3913,12 +3916,12 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
    if (GPRSize > 0) {
      SDValue GRTop, GRTopAddr;
  
    if (GPRSize > 0) {
      SDValue GRTop, GRTopAddr;
  
-    GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                            DAG.getConstant(8, getPointerTy()));
+    GRTopAddr =
+        DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
  
  
-    GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy());
-    GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
-                        DAG.getConstant(GPRSize, getPointerTy()));
+    GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
+    GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
+                        DAG.getConstant(GPRSize, DL, PtrVT));
  
      MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
                                    MachinePointerInfo(SV, 8), false, false, 8));
  
      MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
                                    MachinePointerInfo(SV, 8), false, false, 8));
@@ -3928,28 +3931,30 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
    int FPRSize = FuncInfo->getVarArgsFPRSize();
    if (FPRSize > 0) {
      SDValue VRTop, VRTopAddr;
    int FPRSize = FuncInfo->getVarArgsFPRSize();
    if (FPRSize > 0) {
      SDValue VRTop, VRTopAddr;
-    VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                            DAG.getConstant(16, getPointerTy()));
+    VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+                            DAG.getConstant(16, DL, PtrVT));
  
  
-    VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy());
-    VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
-                        DAG.getConstant(FPRSize, getPointerTy()));
+    VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
+    VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
+                        DAG.getConstant(FPRSize, DL, PtrVT));
  
      MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
                                    MachinePointerInfo(SV, 16), false, false, 8));
    }
  
    // int __gr_offs at offset 24
  
      MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
                                    MachinePointerInfo(SV, 16), false, false, 8));
    }
  
    // int __gr_offs at offset 24
-  SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                                   DAG.getConstant(24, getPointerTy()));
-  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
+  SDValue GROffsAddr =
+      DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
+  MemOps.push_back(DAG.getStore(Chain, DL,
+                                DAG.getConstant(-GPRSize, DL, MVT::i32),
                                  GROffsAddr, MachinePointerInfo(SV, 24), false,
                                  false, 4));
  
    // int __vr_offs at offset 28
                                  GROffsAddr, MachinePointerInfo(SV, 24), false,
                                  false, 4));
  
    // int __vr_offs at offset 28
-  SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                                   DAG.getConstant(28, getPointerTy()));
-  MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
+  SDValue VROffsAddr =
+      DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
+  MemOps.push_back(DAG.getStore(Chain, DL,
+                                DAG.getConstant(-FPRSize, DL, MVT::i32),
                                  VROffsAddr, MachinePointerInfo(SV, 28), false,
                                  false, 4));
  
                                  VROffsAddr, MachinePointerInfo(SV, 28), false,
                                  false, 4));
  
@@ -3966,13 +3971,15 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
                                             SelectionDAG &DAG) const {
    // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
    // pointer.
                                             SelectionDAG &DAG) const {
    // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
    // pointer.
+  SDLoc DL(Op);
    unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
    const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
    const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
  
    unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
    const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
    const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
  
-  return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1),
-                       Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32),
-                       8, false, false, MachinePointerInfo(DestSV),
+  return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
+                       Op.getOperand(2),
+                       DAG.getConstant(VaListSize, DL, MVT::i32),
+                       8, false, false, false, MachinePointerInfo(DestSV),
                         MachinePointerInfo(SrcSV));
  }
  
                         MachinePointerInfo(SrcSV));
  }
  
@@ -3986,21 +3993,22 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
    SDValue Chain = Op.getOperand(0);
    SDValue Addr = Op.getOperand(1);
    unsigned Align = Op.getConstantOperandVal(3);
    SDValue Chain = Op.getOperand(0);
    SDValue Addr = Op.getOperand(1);
    unsigned Align = Op.getConstantOperandVal(3);
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
  
  
-  SDValue VAList = DAG.getLoad(getPointerTy(), DL, Chain, Addr,
-                               MachinePointerInfo(V), false, false, false, 0);
+  SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V),
+                               false, false, false, 0);
    Chain = VAList.getValue(1);
  
    if (Align > 8) {
      assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
    Chain = VAList.getValue(1);
  
    if (Align > 8) {
      assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
-    VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                         DAG.getConstant(Align - 1, getPointerTy()));
-    VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList,
-                         DAG.getConstant(-(int64_t)Align, getPointerTy()));
+    VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+                         DAG.getConstant(Align - 1, DL, PtrVT));
+    VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
+                         DAG.getConstant(-(int64_t)Align, DL, PtrVT));
    }
  
    Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
    }
  
    Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
-  uint64_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
+  uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
  
    // Scalar integer and FP values smaller than 64 bits are implicitly extended
    // up to 64 bits.  At the very least, we have to increase the striding of the
  
    // Scalar integer and FP values smaller than 64 bits are implicitly extended
    // up to 64 bits.  At the very least, we have to increase the striding of the
@@ -4015,8 +4023,8 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
    }
  
    // Increment the pointer, VAList, to the next vaarg
    }
  
    // Increment the pointer, VAList, to the next vaarg
-  SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
-                               DAG.getConstant(ArgSize, getPointerTy()));
+  SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
+                               DAG.getConstant(ArgSize, DL, PtrVT));
    // Store the incremented VAList to the legalized pointer
    SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V),
                                   false, false, 0);
    // Store the incremented VAList to the legalized pointer
    SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V),
                                   false, false, 0);
@@ -4028,7 +4036,7 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
                                   MachinePointerInfo(), false, false, false, 0);
      // Round the value down to an f32.
      SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
                                   MachinePointerInfo(), false, false, false, 0);
      // Round the value down to an f32.
      SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
-                                   DAG.getIntPtrConstant(1));
+                                   DAG.getIntPtrConstant(1, DL));
      SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
      // Merge the rounded value with the chain output of the load.
      return DAG.getMergeValues(Ops, DL);
      SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
      // Merge the rounded value with the chain output of the load.
      return DAG.getMergeValues(Ops, DL);
@@ -4063,7 +4071,8 @@ unsigned AArch64TargetLowering::getRegisterByName(const char* RegName,
                         .Default(0);
    if (Reg)
      return Reg;
                         .Default(0);
    if (Reg)
      return Reg;
-  report_fatal_error("Invalid register name global variable");
+  report_fatal_error(Twine("Invalid register name \""
+                              + StringRef(RegName)  + "\"."));
  }
  
  SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
  }
  
  SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
@@ -4077,7 +4086,7 @@ SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
    unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
    if (Depth) {
      SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
    unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
    if (Depth) {
      SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
-    SDValue Offset = DAG.getConstant(8, getPointerTy());
+    SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
      return DAG.getLoad(VT, DL, DAG.getEntryNode(),
                         DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
                         MachinePointerInfo(), false, false, false, 0);
      return DAG.getLoad(VT, DL, DAG.getEntryNode(),
                         DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
                         MachinePointerInfo(), false, false, false, 0);
@@ -4105,15 +4114,15 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
    assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
  
    SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
    assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
  
    SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
-                                 DAG.getConstant(VTBits, MVT::i64), ShAmt);
+                                 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
    SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
    SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
    SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
    SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
-                                   DAG.getConstant(VTBits, MVT::i64));
+                                   DAG.getConstant(VTBits, dl, MVT::i64));
    SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
  
    SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
  
-  SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64),
+  SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
                                 ISD::SETGE, dl, DAG);
                                 ISD::SETGE, dl, DAG);
-  SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32);
+  SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
  
    SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
    SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
  
    SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
    SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
@@ -4125,8 +4134,9 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
    SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
    SDValue TrueValHi = Opc == ISD::SRA
                            ? DAG.getNode(Opc, dl, VT, ShOpHi,
    SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
    SDValue TrueValHi = Opc == ISD::SRA
                            ? DAG.getNode(Opc, dl, VT, ShOpHi,
-                                        DAG.getConstant(VTBits - 1, MVT::i64))
-                          : DAG.getConstant(0, VT);
+                                        DAG.getConstant(VTBits - 1, dl,
+                                                        MVT::i64))
+                          : DAG.getConstant(0, dl, VT);
    SDValue Hi =
        DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
  
    SDValue Hi =
        DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
  
@@ -4149,24 +4159,24 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
  
    assert(Op.getOpcode() == ISD::SHL_PARTS);
    SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
  
    assert(Op.getOpcode() == ISD::SHL_PARTS);
    SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
-                                 DAG.getConstant(VTBits, MVT::i64), ShAmt);
+                                 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
    SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
    SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
    SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
    SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
-                                   DAG.getConstant(VTBits, MVT::i64));
+                                   DAG.getConstant(VTBits, dl, MVT::i64));
    SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
    SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
  
    SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
  
    SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
    SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
  
    SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
  
-  SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64),
+  SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
                                 ISD::SETGE, dl, DAG);
                                 ISD::SETGE, dl, DAG);
-  SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32);
+  SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
    SDValue Hi =
        DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
  
    // AArch64 shifts of larger than register sizes are wrapped rather than
    // clamped, so we can't just emit "lo << a" if a is too big.
    SDValue Hi =
        DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
  
    // AArch64 shifts of larger than register sizes are wrapped rather than
    // clamped, so we can't just emit "lo << a" if a is too big.
-  SDValue TrueValLo = DAG.getConstant(0, VT);
+  SDValue TrueValLo = DAG.getConstant(0, dl, VT);
    SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
    SDValue Lo =
        DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
    SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
    SDValue Lo =
        DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
@@ -4229,7 +4239,7 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
  /// getConstraintType - Given a constraint letter, return the type of
  /// constraint it is for this target.
  AArch64TargetLowering::ConstraintType
  /// getConstraintType - Given a constraint letter, return the type of
  /// constraint it is for this target.
  AArch64TargetLowering::ConstraintType
-AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
+AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
    if (Constraint.size() == 1) {
      switch (Constraint[0]) {
      default:
    if (Constraint.size() == 1) {
      switch (Constraint[0]) {
      default:
@@ -4280,7 +4290,7 @@ AArch64TargetLowering::getSingleConstraintMatchWeight(
  
  std::pair<unsigned, const TargetRegisterClass *>
  AArch64TargetLowering::getRegForInlineAsmConstraint(
  
  std::pair<unsigned, const TargetRegisterClass *>
  AArch64TargetLowering::getRegForInlineAsmConstraint(
-    const std::string &Constraint, MVT VT) const {
+    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
    if (Constraint.size() == 1) {
      switch (Constraint[0]) {
      case 'r':
    if (Constraint.size() == 1) {
      switch (Constraint[0]) {
      case 'r':
@@ -4309,17 +4319,16 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
    // Use the default implementation in TargetLowering to convert the register
    // constraint into a member of a register class.
    std::pair<unsigned, const TargetRegisterClass *> Res;
    // Use the default implementation in TargetLowering to convert the register
    // constraint into a member of a register class.
    std::pair<unsigned, const TargetRegisterClass *> Res;
-  Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+  Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
  
    // Not found as a standard register?
    if (!Res.second) {
      unsigned Size = Constraint.size();
      if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
          tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
  
    // Not found as a standard register?
    if (!Res.second) {
      unsigned Size = Constraint.size();
      if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
          tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
-      const std::string Reg =
-          std::string(&Constraint[2], &Constraint[Size - 1]);
-      int RegNo = atoi(Reg.c_str());
-      if (RegNo >= 0 && RegNo <= 31) {
+      int RegNo;
+      bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
+      if (!Failed && RegNo >= 0 && RegNo <= 31) {
          // v0 - v31 are aliases of q0 - q31.
          // By default we'll emit v0-v31 for this unless there's a modifier where
          // we'll emit the correct register as well.
          // v0 - v31 are aliases of q0 - q31.
          // By default we'll emit v0-v31 for this unless there's a modifier where
          // we'll emit the correct register as well.
@@ -4458,7 +4467,7 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint(
      }
  
      // All assembler immediates are 64-bit integers.
      }
  
      // All assembler immediates are 64-bit integers.
-    Result = DAG.getTargetConstant(CVal, MVT::i64);
+    Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
      break;
    }
  
      break;
    }
  
@@ -4484,7 +4493,7 @@ static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
    SDLoc DL(V64Reg);
  
    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
    SDLoc DL(V64Reg);
  
    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
-                     V64Reg, DAG.getConstant(0, MVT::i32));
+                     V64Reg, DAG.getConstant(0, DL, MVT::i32));
  }
  
  /// getExtFactor - Determine the adjustment factor for the position when
  }
  
  /// getExtFactor - Determine the adjustment factor for the position when
@@ -4616,23 +4625,26 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
        // The extraction can just take the second half
        Src.ShuffleVec =
            DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
        // The extraction can just take the second half
        Src.ShuffleVec =
            DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
-                      DAG.getIntPtrConstant(NumSrcElts));
+                      DAG.getConstant(NumSrcElts, dl, MVT::i64));
        Src.WindowBase = -NumSrcElts;
      } else if (Src.MaxElt < NumSrcElts) {
        // The extraction can just take the first half
        Src.WindowBase = -NumSrcElts;
      } else if (Src.MaxElt < NumSrcElts) {
        // The extraction can just take the first half
-      Src.ShuffleVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
-                                   Src.ShuffleVec, DAG.getIntPtrConstant(0));
+      Src.ShuffleVec =
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+                      DAG.getConstant(0, dl, MVT::i64));
      } else {
        // An actual VEXT is needed
      } else {
        // An actual VEXT is needed
-      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
-                                     Src.ShuffleVec, DAG.getIntPtrConstant(0));
+      SDValue VEXTSrc1 =
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+                      DAG.getConstant(0, dl, MVT::i64));
        SDValue VEXTSrc2 =
            DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
        SDValue VEXTSrc2 =
            DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
-                      DAG.getIntPtrConstant(NumSrcElts));
+                      DAG.getConstant(NumSrcElts, dl, MVT::i64));
        unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
  
        Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
        unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
  
        Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
-                                   VEXTSrc2, DAG.getConstant(Imm, MVT::i32));
+                                   VEXTSrc2,
+                                   DAG.getConstant(Imm, dl, MVT::i32));
        Src.WindowBase = -Src.MinElt;
      }
    }
        Src.WindowBase = -Src.MinElt;
      }
    }
@@ -4967,11 +4979,11 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
                                  VT.getVectorNumElements() / 2);
    if (SplitV0) {
      V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
                                  VT.getVectorNumElements() / 2);
    if (SplitV0) {
      V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
-                     DAG.getConstant(0, MVT::i64));
+                     DAG.getConstant(0, DL, MVT::i64));
    }
    if (V1.getValueType().getSizeInBits() == 128) {
      V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
    }
    if (V1.getValueType().getSizeInBits() == 128) {
      V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
-                     DAG.getConstant(0, MVT::i64));
+                     DAG.getConstant(0, DL, MVT::i64));
    }
    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
  }
    }
    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
  }
@@ -5038,7 +5050,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
      unsigned Opcode;
      if (EltTy == MVT::i8)
        Opcode = AArch64ISD::DUPLANE8;
      unsigned Opcode;
      if (EltTy == MVT::i8)
        Opcode = AArch64ISD::DUPLANE8;
-    else if (EltTy == MVT::i16)
+    else if (EltTy == MVT::i16 || EltTy == MVT::f16)
        Opcode = AArch64ISD::DUPLANE16;
      else if (EltTy == MVT::i32 || EltTy == MVT::f32)
        Opcode = AArch64ISD::DUPLANE32;
        Opcode = AArch64ISD::DUPLANE16;
      else if (EltTy == MVT::i32 || EltTy == MVT::f32)
        Opcode = AArch64ISD::DUPLANE32;
@@ -5049,7 +5061,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
  
      if (VT.getSizeInBits() == 64)
        OpLHS = WidenVector(OpLHS, DAG);
  
      if (VT.getSizeInBits() == 64)
        OpLHS = WidenVector(OpLHS, DAG);
-    SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, MVT::i64);
+    SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
      return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
    }
    case OP_VEXT1:
      return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
    }
    case OP_VEXT1:
@@ -5057,7 +5069,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
    case OP_VEXT3: {
      unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
      return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
    case OP_VEXT3: {
      unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
      return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
-                       DAG.getConstant(Imm, MVT::i32));
+                       DAG.getConstant(Imm, dl, MVT::i32));
    }
    case OP_VUZPL:
      return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
    }
    case OP_VUZPL:
      return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
@@ -5094,7 +5106,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
    for (int Val : ShuffleMask) {
      for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
        unsigned Offset = Byte + Val * BytesPerElt;
    for (int Val : ShuffleMask) {
      for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
        unsigned Offset = Byte + Val * BytesPerElt;
-      TBLMask.push_back(DAG.getConstant(Offset, MVT::i32));
+      TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
      }
    }
  
      }
    }
  
@@ -5114,7 +5126,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
        V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
      Shuffle = DAG.getNode(
          ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
        V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
      Shuffle = DAG.getNode(
          ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
-        DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst,
+        DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
          DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
                      makeArrayRef(TBLMask.data(), IndexLen)));
    } else {
          DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
                      makeArrayRef(TBLMask.data(), IndexLen)));
    } else {
@@ -5122,7 +5134,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
        V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
        Shuffle = DAG.getNode(
            ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
        V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
        Shuffle = DAG.getNode(
            ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
-          DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst,
+          DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
            DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
                        makeArrayRef(TBLMask.data(), IndexLen)));
      } else {
            DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
                        makeArrayRef(TBLMask.data(), IndexLen)));
      } else {
@@ -5134,7 +5146,8 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
        //                               &TBLMask[0], IndexLen));
        Shuffle = DAG.getNode(
            ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
        //                               &TBLMask[0], IndexLen));
        Shuffle = DAG.getNode(
            ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
-          DAG.getConstant(Intrinsic::aarch64_neon_tbl2, MVT::i32), V1Cst, V2Cst,
+          DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32),
+          V1Cst, V2Cst,
            DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
                        makeArrayRef(TBLMask.data(), IndexLen)));
      }
            DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
                        makeArrayRef(TBLMask.data(), IndexLen)));
      }
@@ -5203,7 +5216,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
      } else if (VT.getSizeInBits() == 64)
        V1 = WidenVector(V1, DAG);
  
      } else if (VT.getSizeInBits() == 64)
        V1 = WidenVector(V1, DAG);
  
-    return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, MVT::i64));
+    return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64));
    }
  
    if (isREVMask(ShuffleMask, VT, 64))
    }
  
    if (isREVMask(ShuffleMask, VT, 64))
@@ -5220,12 +5233,12 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
        std::swap(V1, V2);
      Imm *= getExtFactor(V1);
      return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
        std::swap(V1, V2);
      Imm *= getExtFactor(V1);
      return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
-                       DAG.getConstant(Imm, MVT::i32));
+                       DAG.getConstant(Imm, dl, MVT::i32));
    } else if (V2->getOpcode() == ISD::UNDEF &&
               isSingletonEXTMask(ShuffleMask, VT, Imm)) {
      Imm *= getExtFactor(V1);
      return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
    } else if (V2->getOpcode() == ISD::UNDEF &&
               isSingletonEXTMask(ShuffleMask, VT, Imm)) {
      Imm *= getExtFactor(V1);
      return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
-                       DAG.getConstant(Imm, MVT::i32));
+                       DAG.getConstant(Imm, dl, MVT::i32));
    }
  
    unsigned WhichResult;
    }
  
    unsigned WhichResult;
@@ -5264,7 +5277,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
    int NumInputElements = V1.getValueType().getVectorNumElements();
    if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
      SDValue DstVec = DstIsLeft ? V1 : V2;
    int NumInputElements = V1.getValueType().getVectorNumElements();
    if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
      SDValue DstVec = DstIsLeft ? V1 : V2;
-    SDValue DstLaneV = DAG.getConstant(Anomaly, MVT::i64);
+    SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
  
      SDValue SrcVec = V1;
      int SrcLane = ShuffleMask[Anomaly];
  
      SDValue SrcVec = V1;
      int SrcLane = ShuffleMask[Anomaly];
@@ -5272,7 +5285,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
        SrcVec = V2;
        SrcLane -= VT.getVectorNumElements();
      }
        SrcVec = V2;
        SrcLane -= VT.getVectorNumElements();
      }
-    SDValue SrcLaneV = DAG.getConstant(SrcLane, MVT::i64);
+    SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
  
      EVT ScalarVT = VT.getVectorElementType();
  
  
      EVT ScalarVT = VT.getVectorElementType();
  
@@ -5362,8 +5375,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5371,8 +5384,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5380,8 +5393,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(16, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(16, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5389,8 +5402,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(24, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(24, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5398,8 +5411,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5407,8 +5420,8 @@ SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
      }
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
      }
@@ -5512,7 +5525,8 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
        IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
    SDValue ResultSLI =
        DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
        IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
    SDValue ResultSLI =
        DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                  DAG.getConstant(Intrin, MVT::i32), X, Y, Shift.getOperand(1));
+                  DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
+                  Shift.getOperand(1));
  
    DEBUG(dbgs() << "aarch64-lower: transformed: \n");
    DEBUG(N->dump(&DAG));
  
    DEBUG(dbgs() << "aarch64-lower: transformed: \n");
    DEBUG(N->dump(&DAG));
@@ -5562,8 +5576,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5571,8 +5585,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5580,8 +5594,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(16, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(16, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5589,8 +5603,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(24, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(24, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5598,8 +5612,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5607,8 +5621,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
      }
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
      }
@@ -5643,7 +5657,7 @@ static SDValue NormalizeBuildVector(SDValue Op,
      if (Lane.getOpcode() == ISD::Constant) {
        APInt LowBits(EltTy.getSizeInBits(),
                      cast<ConstantSDNode>(Lane)->getZExtValue());
      if (Lane.getOpcode() == ISD::Constant) {
        APInt LowBits(EltTy.getSizeInBits(),
                      cast<ConstantSDNode>(Lane)->getZExtValue());
-      Lane = DAG.getConstant(LowBits.getZExtValue(), MVT::i32);
+      Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
      }
      Ops.push_back(Lane);
    }
      }
      Ops.push_back(Lane);
    }
@@ -5681,13 +5695,13 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal);
          if (VT.getSizeInBits() == 128) {
            SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal);
          if (VT.getSizeInBits() == 128) {
            SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64,
-                                    DAG.getConstant(CnstVal, MVT::i32));
+                                    DAG.getConstant(CnstVal, dl, MVT::i32));
            return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
          }
  
          // Support the V64 version via subregister insertion.
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64,
            return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
          }
  
          // Support the V64 version via subregister insertion.
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64,
-                                  DAG.getConstant(CnstVal, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5695,8 +5709,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5704,8 +5718,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5713,8 +5727,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(16, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(16, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5722,8 +5736,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(24, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(24, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5731,8 +5745,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5740,8 +5754,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5749,8 +5763,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(264, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(264, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5758,8 +5772,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(272, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(272, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5767,7 +5781,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
          SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5776,7 +5790,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
          SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
          SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5784,7 +5798,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
            VT.getSizeInBits() == 128) {
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
          SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
            VT.getSizeInBits() == 128) {
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
          SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
-                                  DAG.getConstant(CnstVal, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5794,8 +5808,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5803,8 +5817,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5812,8 +5826,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(16, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(16, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5821,8 +5835,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(24, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(24, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5830,8 +5844,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(0, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(0, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5839,8 +5853,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(8, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(8, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5848,8 +5862,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(264, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(264, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
  
@@ -5857,8 +5871,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
          CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
          MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
          SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
-                                  DAG.getConstant(CnstVal, MVT::i32),
-                                  DAG.getConstant(272, MVT::i32));
+                                  DAG.getConstant(CnstVal, dl, MVT::i32),
+                                  DAG.getConstant(272, dl, MVT::i32));
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
      }
          return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
        }
      }
@@ -5941,8 +5955,10 @@ FailedModImm:
  
      if (VT.getVectorElementType().isFloatingPoint()) {
        SmallVector<SDValue, 8> Ops;
  
      if (VT.getVectorElementType().isFloatingPoint()) {
        SmallVector<SDValue, 8> Ops;
-      MVT NewType =
-          (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+      EVT EltTy = VT.getVectorElementType();
+      assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&
+              "Unsupported floating-point vector type");
+      MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
        for (unsigned i = 0; i < NumElts; ++i)
          Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
        EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
        for (unsigned i = 0; i < NumElts; ++i)
          Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
        EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
@@ -5962,7 +5978,7 @@ FailedModImm:
      // Now insert the non-constant lanes.
      for (unsigned i = 0; i < NumElts; ++i) {
        SDValue V = Op.getOperand(i);
      // Now insert the non-constant lanes.
      for (unsigned i = 0; i < NumElts; ++i) {
        SDValue V = Op.getOperand(i);
-      SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
+      SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
        if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
          // Note that type legalization likely mucked about with the VT of the
          // source operand, so we may have to convert it here before inserting.
        if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
          // Note that type legalization likely mucked about with the VT of the
          // source operand, so we may have to convert it here before inserting.
@@ -6004,7 +6020,7 @@ FailedModImm:
        unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
        MachineSDNode *N =
            DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
        unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
        MachineSDNode *N =
            DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
-                             DAG.getTargetConstant(SubIdx, MVT::i32));
+                             DAG.getTargetConstant(SubIdx, dl, MVT::i32));
        Vec = SDValue(N, 0);
        ++i;
      }
        Vec = SDValue(N, 0);
        ++i;
      }
@@ -6012,7 +6028,7 @@ FailedModImm:
        SDValue V = Op.getOperand(i);
        if (V.getOpcode() == ISD::UNDEF)
          continue;
        SDValue V = Op.getOperand(i);
        if (V.getOpcode() == ISD::UNDEF)
          continue;
-      SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
+      SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
      }
      return Vec;
        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
      }
      return Vec;
@@ -6235,10 +6251,11 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
  
    case ISD::SHL:
      if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
  
    case ISD::SHL:
      if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
-      return DAG.getNode(AArch64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0),
-                         DAG.getConstant(Cnt, MVT::i32));
+      return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
+                         DAG.getConstant(Cnt, DL, MVT::i32));
      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                       DAG.getConstant(Intrinsic::aarch64_neon_ushl, MVT::i32),
+                       DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
+                                       MVT::i32),
                         Op.getOperand(0), Op.getOperand(1));
    case ISD::SRA:
    case ISD::SRL:
                         Op.getOperand(0), Op.getOperand(1));
    case ISD::SRA:
    case ISD::SRL:
@@ -6247,8 +6264,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
          Cnt < EltSize) {
        unsigned Opc =
            (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
          Cnt < EltSize) {
        unsigned Opc =
            (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
-      return DAG.getNode(Opc, SDLoc(Op), VT, Op.getOperand(0),
-                         DAG.getConstant(Cnt, MVT::i32));
+      return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
+                         DAG.getConstant(Cnt, DL, MVT::i32));
      }
  
      // Right shift register.  Note, there is not a shift right register
      }
  
      // Right shift register.  Note, there is not a shift right register
@@ -6260,7 +6277,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
      SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
      SDValue NegShiftLeft =
          DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
      SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
      SDValue NegShiftLeft =
          DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                    DAG.getConstant(Opc, MVT::i32), Op.getOperand(0), NegShift);
+                    DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
+                    NegShift);
      return NegShiftLeft;
    }
  
      return NegShiftLeft;
    }
  
@@ -6271,6 +6289,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
                                      AArch64CC::CondCode CC, bool NoNans, EVT VT,
                                      SDLoc dl, SelectionDAG &DAG) {
    EVT SrcVT = LHS.getValueType();
                                      AArch64CC::CondCode CC, bool NoNans, EVT VT,
                                      SDLoc dl, SelectionDAG &DAG) {
    EVT SrcVT = LHS.getValueType();
+  assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+         "function only supposed to emit natural comparisons");
  
    BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
    APInt CnstBits(VT.getSizeInBits(), 0);
  
    BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
    APInt CnstBits(VT.getSizeInBits(), 0);
@@ -6365,13 +6385,15 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
    SDValue LHS = Op.getOperand(0);
    SDValue RHS = Op.getOperand(1);
    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
    SDValue LHS = Op.getOperand(0);
    SDValue RHS = Op.getOperand(1);
+  EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
    SDLoc dl(Op);
  
    if (LHS.getValueType().getVectorElementType().isInteger()) {
      assert(LHS.getValueType() == RHS.getValueType());
      AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
    SDLoc dl(Op);
  
    if (LHS.getValueType().getVectorElementType().isInteger()) {
      assert(LHS.getValueType() == RHS.getValueType());
      AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
-    return EmitVectorComparison(LHS, RHS, AArch64CC, false, Op.getValueType(),
-                                dl, DAG);
+    SDValue Cmp =
+        EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
+    return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
    }
  
    assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
    }
  
    assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
@@ -6385,19 +6407,21 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
  
    bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
    SDValue Cmp =
  
    bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
    SDValue Cmp =
-      EmitVectorComparison(LHS, RHS, CC1, NoNaNs, Op.getValueType(), dl, DAG);
+      EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
    if (!Cmp.getNode())
      return SDValue();
  
    if (CC2 != AArch64CC::AL) {
      SDValue Cmp2 =
    if (!Cmp.getNode())
      return SDValue();
  
    if (CC2 != AArch64CC::AL) {
      SDValue Cmp2 =
-        EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG);
+        EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
      if (!Cmp2.getNode())
        return SDValue();
  
      if (!Cmp2.getNode())
        return SDValue();
  
-    Cmp = DAG.getNode(ISD::OR, dl, Cmp.getValueType(), Cmp, Cmp2);
+    Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
    }
  
    }
  
+  Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
+
    if (ShouldInvert)
      return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
  
    if (ShouldInvert)
      return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
  
@@ -6535,6 +6559,35 @@ bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
    return NumBits1 > NumBits2;
  }
  
    return NumBits1 > NumBits2;
  }
  
+/// Check if it is profitable to hoist instruction in then/else to if.
+/// Not profitable if I and it's user can form a FMA instruction
+/// because we prefer FMSUB/FMADD.
+bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
+  if (I->getOpcode() != Instruction::FMul)
+    return true;
+
+  if (I->getNumUses() != 1)
+    return true;
+
+  Instruction *User = I->user_back();
+
+  if (User &&
+      !(User->getOpcode() == Instruction::FSub ||
+        User->getOpcode() == Instruction::FAdd))
+    return true;
+
+  const TargetOptions &Options = getTargetMachine().Options;
+  const DataLayout &DL = I->getModule()->getDataLayout();
+  EVT VT = getValueType(DL, User->getOperand(0)->getType());
+
+  if (isFMAFasterThanFMulAndFAdd(VT) &&
+      isOperationLegalOrCustom(ISD::FMA, VT) &&
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath))
+    return false;
+
+  return true;
+}
+
  // All 32-bit GPR operations implicitly zero the high-half of the corresponding
  // 64-bit GPR.
  bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
  // All 32-bit GPR operations implicitly zero the high-half of the corresponding
  // 64-bit GPR.
  bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
@@ -6567,6 +6620,59 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
            VT1.getSizeInBits() <= 32);
  }
  
            VT1.getSizeInBits() <= 32);
  }
  
+bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
+  if (isa<FPExtInst>(Ext))
+    return false;
+
+  // Vector types are next free.
+  if (Ext->getType()->isVectorTy())
+    return false;
+
+  for (const Use &U : Ext->uses()) {
+    // The extension is free if we can fold it with a left shift in an
+    // addressing mode or an arithmetic operation: add, sub, and cmp.
+
+    // Is there a shift?
+    const Instruction *Instr = cast<Instruction>(U.getUser());
+
+    // Is this a constant shift?
+    switch (Instr->getOpcode()) {
+    case Instruction::Shl:
+      if (!isa<ConstantInt>(Instr->getOperand(1)))
+        return false;
+      break;
+    case Instruction::GetElementPtr: {
+      gep_type_iterator GTI = gep_type_begin(Instr);
+      std::advance(GTI, U.getOperandNo());
+      Type *IdxTy = *GTI;
+      // This extension will end up with a shift because of the scaling factor.
+      // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
+      // Get the shift amount based on the scaling factor:
+      // log2(sizeof(IdxTy)) - log2(8).
+      uint64_t ShiftAmt =
+        countTrailingZeros(getDataLayout()->getTypeStoreSizeInBits(IdxTy)) - 3;
+      // Is the constant foldable in the shift of the addressing mode?
+      // I.e., shift amount is between 1 and 4 inclusive.
+      if (ShiftAmt == 0 || ShiftAmt > 4)
+        return false;
+      break;
+    }
+    case Instruction::Trunc:
+      // Check if this is a noop.
+      // trunc(sext ty1 to ty2) to ty1.
+      if (Instr->getType() == Ext->getOperand(0)->getType())
+        continue;
+    // FALL THROUGH.
+    default:
+      return false;
+    }
+
+    // At this point we can use the bfm family, so this extension is free
+    // for that use.
+  }
+  return true;
+}
+
  bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
                                            unsigned &RequiredAligment) const {
    if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
  bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
                                            unsigned &RequiredAligment) const {
    if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
@@ -6588,6 +6694,160 @@ bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
    return NumBits == 32 || NumBits == 64;
  }
  
    return NumBits == 32 || NumBits == 64;
  }
  
+/// \brief Lower an interleaved load into a ldN intrinsic.
+///
+/// E.g. Lower an interleaved load (Factor = 2):
+///        %wide.vec = load <8 x i32>, <8 x i32>* %ptr
+///        %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6>  ; Extract even elements
+///        %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7>  ; Extract odd elements
+///
+///      Into:
+///        %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
+///        %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
+///        %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
+bool AArch64TargetLowering::lowerInterleavedLoad(
+    LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
+    ArrayRef<unsigned> Indices, unsigned Factor) const {
+  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+         "Invalid interleave factor");
+  assert(!Shuffles.empty() && "Empty shufflevector input");
+  assert(Shuffles.size() == Indices.size() &&
+         "Unmatched number of shufflevectors and indices");
+
+  const DataLayout *DL = getDataLayout();
+
+  VectorType *VecTy = Shuffles[0]->getType();
+  unsigned VecSize = DL->getTypeAllocSizeInBits(VecTy);
+
+  // Skip illegal vector types.
+  if (VecSize != 64 && VecSize != 128)
+    return false;
+
+  // A pointer vector can not be the return type of the ldN intrinsics. Need to
+  // load integer vectors first and then convert to pointer vectors.
+  Type *EltTy = VecTy->getVectorElementType();
+  if (EltTy->isPointerTy())
+    VecTy = VectorType::get(DL->getIntPtrType(EltTy),
+                            VecTy->getVectorNumElements());
+
+  Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
+  Type *Tys[2] = {VecTy, PtrTy};
+  static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
+                                            Intrinsic::aarch64_neon_ld3,
+                                            Intrinsic::aarch64_neon_ld4};
+  Function *LdNFunc =
+      Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
+
+  IRBuilder<> Builder(LI);
+  Value *Ptr = Builder.CreateBitCast(LI->getPointerOperand(), PtrTy);
+
+  CallInst *LdN = Builder.CreateCall(LdNFunc, Ptr, "ldN");
+
+  // Replace uses of each shufflevector with the corresponding vector loaded
+  // by ldN.
+  for (unsigned i = 0; i < Shuffles.size(); i++) {
+    ShuffleVectorInst *SVI = Shuffles[i];
+    unsigned Index = Indices[i];
+
+    Value *SubVec = Builder.CreateExtractValue(LdN, Index);
+
+    // Convert the integer vector to pointer vector if the element is pointer.
+    if (EltTy->isPointerTy())
+      SubVec = Builder.CreateIntToPtr(SubVec, SVI->getType());
+
+    SVI->replaceAllUsesWith(SubVec);
+  }
+
+  return true;
+}
+
+/// \brief Get a mask consisting of sequential integers starting from \p Start.
+///
+/// I.e. <Start, Start + 1, ..., Start + NumElts - 1>
+static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
+                                   unsigned NumElts) {
+  SmallVector<Constant *, 16> Mask;
+  for (unsigned i = 0; i < NumElts; i++)
+    Mask.push_back(Builder.getInt32(Start + i));
+
+  return ConstantVector::get(Mask);
+}
+
+/// \brief Lower an interleaved store into a stN intrinsic.
+///
+/// E.g. Lower an interleaved store (Factor = 3):
+///        %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
+///                                  <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+///        store <12 x i32> %i.vec, <12 x i32>* %ptr
+///
+///      Into:
+///        %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
+///        %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
+///        %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
+///        call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
+///
+/// Note that the new shufflevectors will be removed and we'll only generate one
+/// st3 instruction in CodeGen.
+bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
+                                                  ShuffleVectorInst *SVI,
+                                                  unsigned Factor) const {
+  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+         "Invalid interleave factor");
+
+  VectorType *VecTy = SVI->getType();
+  assert(VecTy->getVectorNumElements() % Factor == 0 &&
+         "Invalid interleaved store");
+
+  unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
+  Type *EltTy = VecTy->getVectorElementType();
+  VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
+
+  const DataLayout *DL = getDataLayout();
+  unsigned SubVecSize = DL->getTypeAllocSizeInBits(SubVecTy);
+
+  // Skip illegal vector types.
+  if (SubVecSize != 64 && SubVecSize != 128)
+    return false;
+
+  Value *Op0 = SVI->getOperand(0);
+  Value *Op1 = SVI->getOperand(1);
+  IRBuilder<> Builder(SI);
+
+  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
+  // vectors to integer vectors.
+  if (EltTy->isPointerTy()) {
+    Type *IntTy = DL->getIntPtrType(EltTy);
+    unsigned NumOpElts =
+        dyn_cast<VectorType>(Op0->getType())->getVectorNumElements();
+
+    // Convert to the corresponding integer vector.
+    Type *IntVecTy = VectorType::get(IntTy, NumOpElts);
+    Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
+    Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
+
+    SubVecTy = VectorType::get(IntTy, NumSubElts);
+  }
+
+  Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
+  Type *Tys[2] = {SubVecTy, PtrTy};
+  static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
+                                             Intrinsic::aarch64_neon_st3,
+                                             Intrinsic::aarch64_neon_st4};
+  Function *StNFunc =
+      Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
+
+  SmallVector<Value *, 5> Ops;
+
+  // Split the shufflevector operands into sub vectors for the new stN call.
+  for (unsigned i = 0; i < Factor; i++)
+    Ops.push_back(Builder.CreateShuffleVector(
+        Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
+
+  Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy));
+  Builder.CreateCall(StNFunc, Ops);
+  return true;
+}
+
  static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
                         unsigned AlignCheck) {
    return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
  static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
                         unsigned AlignCheck) {
    return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
@@ -6605,13 +6865,22 @@ EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
    bool Fast;
    const Function *F = MF.getFunction();
    if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 &&
    bool Fast;
    const Function *F = MF.getFunction();
    if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 &&
-      !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                       Attribute::NoImplicitFloat) &&
+      !F->hasFnAttribute(Attribute::NoImplicitFloat) &&
        (memOpAlign(SrcAlign, DstAlign, 16) ||
         (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
      return MVT::f128;
  
        (memOpAlign(SrcAlign, DstAlign, 16) ||
         (allowsMisalignedMemoryAccesses(MVT::f128, 0, 1, &Fast) && Fast)))
      return MVT::f128;
  
-  return Size >= 8 ? MVT::i64 : MVT::i32;
+  if (Size >= 8 &&
+      (memOpAlign(SrcAlign, DstAlign, 8) ||
+       (allowsMisalignedMemoryAccesses(MVT::i64, 0, 1, &Fast) && Fast)))
+    return MVT::i64;
+
+  if (Size >= 4 &&
+      (memOpAlign(SrcAlign, DstAlign, 4) ||
+       (allowsMisalignedMemoryAccesses(MVT::i32, 0, 1, &Fast) && Fast)))
+    return MVT::i32;
+
+  return MVT::Other;
  }
  
  // 12-bit optionally shifted immediates are legal for adds.
  }
  
  // 12-bit optionally shifted immediates are legal for adds.
@@ -6632,7 +6901,8 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
  /// isLegalAddressingMode - Return true if the addressing mode represented
  /// by AM is legal for this target, for a load/store of the specified type.
  bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
  /// isLegalAddressingMode - Return true if the addressing mode represented
  /// by AM is legal for this target, for a load/store of the specified type.
  bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
-                                                  Type *Ty) const {
+                                                  Type *Ty,
+                                                  unsigned AS) const {
    // AArch64 has five basic addressing modes:
    //  reg
    //  reg + 9-bit signed offset
    // AArch64 has five basic addressing modes:
    //  reg
    //  reg + 9-bit signed offset
@@ -6683,7 +6953,8 @@ bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
  }
  
  int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
  }
  
  int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
-                                                Type *Ty) const {
+                                                Type *Ty,
+                                                unsigned AS) const {
    // Scaling factors are not free at all.
    // Operands                     | Rt Latency
    // -------------------------------------------
    // Scaling factors are not free at all.
    // Operands                     | Rt Latency
    // -------------------------------------------
@@ -6691,7 +6962,7 @@ int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
    // -------------------------------------------
    // Rt, [Xn, Xm, lsl #imm]       | Rn: 4 Rm: 5
    // Rt, [Xn, Wm, <extend> #imm]  |
    // -------------------------------------------
    // Rt, [Xn, Xm, lsl #imm]       | Rn: 4 Rm: 5
    // Rt, [Xn, Wm, <extend> #imm]  |
-  if (isLegalAddressingMode(AM, Ty))
+  if (isLegalAddressingMode(AM, Ty, AS))
      // Scale represents reg2 * scale, thus account for 1 if
      // it is not equal to 0 or 1.
      return AM.Scale != 0 && AM.Scale != 1;
      // Scale represents reg2 * scale, thus account for 1 if
      // it is not equal to 0 or 1.
      return AM.Scale != 0 && AM.Scale != 1;
@@ -6762,7 +7033,7 @@ bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
    unsigned LZ = countLeadingZeros((uint64_t)Val);
    unsigned Shift = (63 - LZ) / 16;
    // MOVZ is free so return true for one or fewer MOVK.
    unsigned LZ = countLeadingZeros((uint64_t)Val);
    unsigned Shift = (63 - LZ) / 16;
    // MOVZ is free so return true for one or fewer MOVK.
-  return (Shift < 3) ? true : false;
+  return Shift < 3;
  }
  
  // Generate SUBS and CSEL for integer abs.
  }
  
  // Generate SUBS and CSEL for integer abs.
@@ -6780,14 +7051,14 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
        N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0))
      if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
        if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
        N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0))
      if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
        if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
-        SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+        SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
                                    N0.getOperand(0));
          // Generate SUBS & CSEL.
          SDValue Cmp =
              DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
                                    N0.getOperand(0));
          // Generate SUBS & CSEL.
          SDValue Cmp =
              DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
-                        N0.getOperand(0), DAG.getConstant(0, VT));
+                        N0.getOperand(0), DAG.getConstant(0, DL, VT));
          return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
          return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
-                           DAG.getConstant(AArch64CC::PL, MVT::i32),
+                           DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
                             SDValue(Cmp.getNode(), 1));
        }
    return SDValue();
                             SDValue(Cmp.getNode(), 1));
        }
    return SDValue();
@@ -6816,8 +7087,8 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
    SDLoc DL(N);
    SDValue N0 = N->getOperand(0);
    unsigned Lg2 = Divisor.countTrailingZeros();
    SDLoc DL(N);
    SDValue N0 = N->getOperand(0);
    unsigned Lg2 = Divisor.countTrailingZeros();
-  SDValue Zero = DAG.getConstant(0, VT);
-  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, VT);
+  SDValue Zero = DAG.getConstant(0, DL, VT);
+  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
  
    // Add (N0 < 0) ? Pow2 - 1 : 0;
    SDValue CCVal;
  
    // Add (N0 < 0) ? Pow2 - 1 : 0;
    SDValue CCVal;
@@ -6833,7 +7104,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
  
    // Divide by pow2.
    SDValue SRA =
  
    // Divide by pow2.
    SDValue SRA =
-      DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, MVT::i64));
+      DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
  
    // If we're dividing by a positive value, we're done.  Otherwise, we must
    // negate the result.
  
    // If we're dividing by a positive value, we're done.  Otherwise, we must
    // negate the result.
@@ -6842,7 +7113,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
  
    if (Created)
      Created->push_back(SRA.getNode());
  
    if (Created)
      Created->push_back(SRA.getNode());
-  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), SRA);
+  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
  }
  
  static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
  }
  
  static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
@@ -6859,45 +7130,46 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
      APInt Value = C->getAPIntValue();
      EVT VT = N->getValueType(0);
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
      APInt Value = C->getAPIntValue();
      EVT VT = N->getValueType(0);
+    SDLoc DL(N);
      if (Value.isNonNegative()) {
        // (mul x, 2^N + 1) => (add (shl x, N), x)
        APInt VM1 = Value - 1;
        if (VM1.isPowerOf2()) {
          SDValue ShiftedVal =
      if (Value.isNonNegative()) {
        // (mul x, 2^N + 1) => (add (shl x, N), x)
        APInt VM1 = Value - 1;
        if (VM1.isPowerOf2()) {
          SDValue ShiftedVal =
-            DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
-                        DAG.getConstant(VM1.logBase2(), MVT::i64));
-        return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal,
+            DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                        DAG.getConstant(VM1.logBase2(), DL, MVT::i64));
+        return DAG.getNode(ISD::ADD, DL, VT, ShiftedVal,
                             N->getOperand(0));
        }
        // (mul x, 2^N - 1) => (sub (shl x, N), x)
        APInt VP1 = Value + 1;
        if (VP1.isPowerOf2()) {
          SDValue ShiftedVal =
                             N->getOperand(0));
        }
        // (mul x, 2^N - 1) => (sub (shl x, N), x)
        APInt VP1 = Value + 1;
        if (VP1.isPowerOf2()) {
          SDValue ShiftedVal =
-            DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
-                        DAG.getConstant(VP1.logBase2(), MVT::i64));
-        return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal,
+            DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                        DAG.getConstant(VP1.logBase2(), DL, MVT::i64));
+        return DAG.getNode(ISD::SUB, DL, VT, ShiftedVal,
                             N->getOperand(0));
        }
      } else {
                             N->getOperand(0));
        }
      } else {
-      // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
-      APInt VNM1 = -Value - 1;
-      if (VNM1.isPowerOf2()) {
-        SDValue ShiftedVal =
-            DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
-                        DAG.getConstant(VNM1.logBase2(), MVT::i64));
-        SDValue Add =
-            DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
-        return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add);
-      }
        // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
        APInt VNP1 = -Value + 1;
        if (VNP1.isPowerOf2()) {
          SDValue ShiftedVal =
        // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
        APInt VNP1 = -Value + 1;
        if (VNP1.isPowerOf2()) {
          SDValue ShiftedVal =
-            DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
-                        DAG.getConstant(VNP1.logBase2(), MVT::i64));
-        return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0),
+            DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                        DAG.getConstant(VNP1.logBase2(), DL, MVT::i64));
+        return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0),
                             ShiftedVal);
        }
                             ShiftedVal);
        }
+      // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+      APInt VNM1 = -Value - 1;
+      if (VNM1.isPowerOf2()) {
+        SDValue ShiftedVal =
+            DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                        DAG.getConstant(VNM1.logBase2(), DL, MVT::i64));
+        SDValue Add =
+            DAG.getNode(ISD::ADD, DL, VT, ShiftedVal, N->getOperand(0));
+        return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Add);
+      }
      }
    }
    return SDValue();
      }
    }
    return SDValue();
@@ -6949,7 +7221,8 @@ static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
    return SDValue();
  }
  
    return SDValue();
  }
  
-static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
+                                     const AArch64Subtarget *Subtarget) {
    // First try to optimize away the conversion when it's conditionally from
    // a constant. Vectors only.
    SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
    // First try to optimize away the conversion when it's conditionally from
    // a constant. Vectors only.
    SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
@@ -6968,7 +7241,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
    // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
    // This eliminates an "integer-to-vector-move UOP and improve throughput.
    SDValue N0 = N->getOperand(0);
    // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
    // This eliminates an "integer-to-vector-move UOP and improve throughput.
    SDValue N0 = N->getOperand(0);
-  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+  if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
        // Do not change the width of a volatile load.
        !cast<LoadSDNode>(N0)->isVolatile()) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
        // Do not change the width of a volatile load.
        !cast<LoadSDNode>(N0)->isVolatile()) {
      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -7050,7 +7323,7 @@ static SDValue tryCombineToEXTR(SDNode *N,
    }
  
    return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
    }
  
    return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
-                     DAG.getConstant(ShiftRHS, MVT::i64));
+                     DAG.getConstant(ShiftRHS, DL, MVT::i64));
  }
  
  static SDValue tryCombineToBSL(SDNode *N,
  }
  
  static SDValue tryCombineToBSL(SDNode *N,
@@ -7178,10 +7451,10 @@ static SDValue performBitcastCombine(SDNode *N,
    SDLoc dl(N);
    unsigned NumElements = VT.getVectorNumElements();
    if (idx) {
    SDLoc dl(N);
    unsigned NumElements = VT.getVectorNumElements();
    if (idx) {
-    SDValue HalfIdx = DAG.getConstant(NumElements, MVT::i64);
+    SDValue HalfIdx = DAG.getConstant(NumElements, dl, MVT::i64);
      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx);
    } else {
      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx);
    } else {
-    SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, MVT::i32);
+    SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, dl, MVT::i32);
      return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT,
                                        Source, SubReg),
                     0);
      return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT,
                                        Source, SubReg),
                     0);
@@ -7191,22 +7464,55 @@ static SDValue performBitcastCombine(SDNode *N,
  static SDValue performConcatVectorsCombine(SDNode *N,
                                             TargetLowering::DAGCombinerInfo &DCI,
                                             SelectionDAG &DAG) {
  static SDValue performConcatVectorsCombine(SDNode *N,
                                             TargetLowering::DAGCombinerInfo &DCI,
                                             SelectionDAG &DAG) {
+  SDLoc dl(N);
+  EVT VT = N->getValueType(0);
+  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+
+  // Optimize concat_vectors of truncated vectors, where the intermediate
+  // type is illegal, to avoid said illegality,  e.g.,
+  //   (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
+  //                          (v2i16 (truncate (v2i64)))))
+  // ->
+  //   (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
+  //                                    (v4i32 (bitcast (v2i64))),
+  //                                    <0, 2, 4, 6>)))
+  // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
+  // on both input and result type, so we might generate worse code.
+  // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
+  if (N->getNumOperands() == 2 &&
+      N0->getOpcode() == ISD::TRUNCATE &&
+      N1->getOpcode() == ISD::TRUNCATE) {
+    SDValue N00 = N0->getOperand(0);
+    SDValue N10 = N1->getOperand(0);
+    EVT N00VT = N00.getValueType();
+
+    if (N00VT == N10.getValueType() &&
+        (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
+        N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
+      MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
+      SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
+      for (size_t i = 0; i < Mask.size(); ++i)
+        Mask[i] = i * 2;
+      return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                         DAG.getVectorShuffle(
+                             MidVT, dl,
+                             DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
+                             DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
+    }
+  }
+
    // Wait 'til after everything is legalized to try this. That way we have
    // legal vector types and such.
    if (DCI.isBeforeLegalizeOps())
      return SDValue();
  
    // Wait 'til after everything is legalized to try this. That way we have
    // legal vector types and such.
    if (DCI.isBeforeLegalizeOps())
      return SDValue();
  
-  SDLoc dl(N);
-  EVT VT = N->getValueType(0);
-
    // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
    // splat. The indexed instructions are going to be expecting a DUPLANE64, so
    // canonicalise to that.
    // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
    // splat. The indexed instructions are going to be expecting a DUPLANE64, so
    // canonicalise to that.
-  if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) {
+  if (N0 == N1 && VT.getVectorNumElements() == 2) {
      assert(VT.getVectorElementType().getSizeInBits() == 64);
      assert(VT.getVectorElementType().getSizeInBits() == 64);
-    return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT,
-                       WidenVector(N->getOperand(0), DAG),
-                       DAG.getConstant(0, MVT::i64));
+    return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
+                       DAG.getConstant(0, dl, MVT::i64));
    }
  
    // Canonicalise concat_vectors so that the right-hand vector has as few
    }
  
    // Canonicalise concat_vectors so that the right-hand vector has as few
@@ -7218,10 +7524,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
    // becomes
    //    (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
  
    // becomes
    //    (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
  
-  SDValue Op1 = N->getOperand(1);
-  if (Op1->getOpcode() != ISD::BITCAST)
+  if (N1->getOpcode() != ISD::BITCAST)
      return SDValue();
      return SDValue();
-  SDValue RHS = Op1->getOperand(0);
+  SDValue RHS = N1->getOperand(0);
    MVT RHSTy = RHS.getValueType().getSimpleVT();
    // If the RHS is not a vector, this is not the pattern we're looking for.
    if (!RHSTy.isVector())
    MVT RHSTy = RHS.getValueType().getSimpleVT();
    // If the RHS is not a vector, this is not the pattern we're looking for.
    if (!RHSTy.isVector())
@@ -7231,10 +7536,10 @@ static SDValue performConcatVectorsCombine(SDNode *N,
  
    MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
                                    RHSTy.getVectorNumElements() * 2);
  
    MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
                                    RHSTy.getVectorNumElements() * 2);
-  return DAG.getNode(
-      ISD::BITCAST, dl, VT,
-      DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
-                  DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS));
+  return DAG.getNode(ISD::BITCAST, dl, VT,
+                     DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
+                                 DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
+                                 RHS));
  }
  
  static SDValue tryCombineFixedPointConvert(SDNode *N,
  }
  
  static SDValue tryCombineFixedPointConvert(SDNode *N,
@@ -7297,21 +7602,26 @@ static SDValue tryCombineFixedPointConvert(SDNode *N,
  //
  // This routine does the actual conversion of such DUPs, once outer routines
  // have determined that everything else is in order.
  //
  // This routine does the actual conversion of such DUPs, once outer routines
  // have determined that everything else is in order.
+// It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
+// similarly here.
  static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
  static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
-  // We can handle most types of duplicate, but the lane ones have an extra
-  // operand saying *which* lane, so we need to know.
-  bool IsDUPLANE;
    switch (N.getOpcode()) {
    case AArch64ISD::DUP:
    switch (N.getOpcode()) {
    case AArch64ISD::DUP:
-    IsDUPLANE = false;
-    break;
    case AArch64ISD::DUPLANE8:
    case AArch64ISD::DUPLANE16:
    case AArch64ISD::DUPLANE32:
    case AArch64ISD::DUPLANE64:
    case AArch64ISD::DUPLANE8:
    case AArch64ISD::DUPLANE16:
    case AArch64ISD::DUPLANE32:
    case AArch64ISD::DUPLANE64:
-    IsDUPLANE = true;
+  case AArch64ISD::MOVI:
+  case AArch64ISD::MOVIshift:
+  case AArch64ISD::MOVIedit:
+  case AArch64ISD::MOVImsl:
+  case AArch64ISD::MVNIshift:
+  case AArch64ISD::MVNImsl:
      break;
    default:
      break;
    default:
+    // FMOV could be supported, but isn't very useful, as it would only occur
+    // if you passed a bitcast' floating point immediate to an eligible long
+    // integer op (addl, smull, ...).
      return SDValue();
    }
  
      return SDValue();
    }
  
@@ -7321,17 +7631,12 @@ static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
  
    MVT ElementTy = NarrowTy.getVectorElementType();
    unsigned NumElems = NarrowTy.getVectorNumElements();
  
    MVT ElementTy = NarrowTy.getVectorElementType();
    unsigned NumElems = NarrowTy.getVectorNumElements();
-  MVT NewDUPVT = MVT::getVectorVT(ElementTy, NumElems * 2);
-
-  SDValue NewDUP;
-  if (IsDUPLANE)
-    NewDUP = DAG.getNode(N.getOpcode(), SDLoc(N), NewDUPVT, N.getOperand(0),
-                         N.getOperand(1));
-  else
-    NewDUP = DAG.getNode(AArch64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0));
+  MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
  
  
-  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N.getNode()), NarrowTy,
-                     NewDUP, DAG.getConstant(NumElems, MVT::i64));
+  SDLoc dl(N);
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
+                     DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
+                     DAG.getConstant(NumElems, dl, MVT::i64));
  }
  
  static bool isEssentiallyExtractSubvector(SDValue N) {
  }
  
  static bool isEssentiallyExtractSubvector(SDValue N) {
@@ -7456,7 +7761,8 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
    SDLoc dl(Op);
    if (InfoAndKind.IsAArch64) {
      CCVal = DAG.getConstant(
    SDLoc dl(Op);
    if (InfoAndKind.IsAArch64) {
      CCVal = DAG.getConstant(
-        AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), MVT::i32);
+        AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
+        MVT::i32);
      Cmp = *InfoAndKind.Info.AArch64.Cmp;
    } else
      Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0,
      Cmp = *InfoAndKind.Info.AArch64.Cmp;
    } else
      Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0,
@@ -7465,7 +7771,7 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
                        CCVal, DAG, dl);
  
    EVT VT = Op->getValueType(0);
                        CCVal, DAG, dl);
  
    EVT VT = Op->getValueType(0);
-  LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, VT));
+  LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
    return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
  }
  
    return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
  }
  
@@ -7605,12 +7911,15 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
      break;
    }
  
      break;
    }
  
-  if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits)
-    return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
-                       DAG.getConstant(-ShiftAmount, MVT::i32));
-  else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits)
-    return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
-                       DAG.getConstant(ShiftAmount, MVT::i32));
+  if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
+    SDLoc dl(N);
+    return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
+                       DAG.getConstant(-ShiftAmount, dl, MVT::i32));
+  } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
+    SDLoc dl(N);
+    return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
+                       DAG.getConstant(ShiftAmount, dl, MVT::i32));
+  }
  
    return SDValue();
  }
  
    return SDValue();
  }
@@ -7631,6 +7940,16 @@ static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
                       N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
  }
  
                       N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
  }
  
+static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
+                                           SelectionDAG &DAG) {
+  SDLoc dl(N);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
+                     DAG.getNode(Opc, dl,
+                                 N->getOperand(1).getSimpleValueType(),
+                                 N->getOperand(1)),
+                     DAG.getConstant(0, dl, MVT::i64));
+}
+
  static SDValue performIntrinsicCombine(SDNode *N,
                                         TargetLowering::DAGCombinerInfo &DCI,
                                         const AArch64Subtarget *Subtarget) {
  static SDValue performIntrinsicCombine(SDNode *N,
                                         TargetLowering::DAGCombinerInfo &DCI,
                                         const AArch64Subtarget *Subtarget) {
@@ -7643,6 +7962,18 @@ static SDValue performIntrinsicCombine(SDNode *N,
    case Intrinsic::aarch64_neon_vcvtfxu2fp:
      return tryCombineFixedPointConvert(N, DCI, DAG);
      break;
    case Intrinsic::aarch64_neon_vcvtfxu2fp:
      return tryCombineFixedPointConvert(N, DCI, DAG);
      break;
+  case Intrinsic::aarch64_neon_saddv:
+    return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
+  case Intrinsic::aarch64_neon_uaddv:
+    return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
+  case Intrinsic::aarch64_neon_sminv:
+    return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
+  case Intrinsic::aarch64_neon_uminv:
+    return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
+  case Intrinsic::aarch64_neon_smaxv:
+    return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
+  case Intrinsic::aarch64_neon_umaxv:
+    return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
    case Intrinsic::aarch64_neon_fmax:
      return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
                         N->getOperand(1), N->getOperand(2));
    case Intrinsic::aarch64_neon_fmax:
      return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
                         N->getOperand(1), N->getOperand(2));
@@ -7757,9 +8088,9 @@ static SDValue performExtendCombine(SDNode *N,
    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
                                 LoVT.getVectorNumElements());
    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
                                 LoVT.getVectorNumElements());
    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
-                   DAG.getIntPtrConstant(0));
+                   DAG.getConstant(0, DL, MVT::i64));
    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
-                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+                   DAG.getConstant(InNVT.getVectorNumElements(), DL, MVT::i64));
    Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
    Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
  
    Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
    Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
  
@@ -7819,7 +8150,7 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
    unsigned Offset = EltOffset;
    while (--NumVecElts) {
      SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
    unsigned Offset = EltOffset;
    while (--NumVecElts) {
      SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
-                                    DAG.getConstant(Offset, MVT::i64));
+                                    DAG.getConstant(Offset, DL, MVT::i64));
      NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
                            St->getPointerInfo(), St->isVolatile(),
                            St->isNonTemporal(), Alignment);
      NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
                            St->getPointerInfo(), St->isVolatile(),
                            St->isNonTemporal(), Alignment);
@@ -7840,14 +8171,13 @@ static SDValue performSTORECombine(SDNode *N,
      return SDValue();
  
    // Cyclone has bad performance on unaligned 16B stores when crossing line and
      return SDValue();
  
    // Cyclone has bad performance on unaligned 16B stores when crossing line and
-  // page boundries. We want to split such stores.
+  // page boundaries. We want to split such stores.
    if (!Subtarget->isCyclone())
      return SDValue();
  
    // Don't split at Oz.
    MachineFunction &MF = DAG.getMachineFunction();
    if (!Subtarget->isCyclone())
      return SDValue();
  
    // Don't split at Oz.
    MachineFunction &MF = DAG.getMachineFunction();
-  bool IsMinSize = MF.getFunction()->getAttributes().hasAttribute(
-      AttributeSet::FunctionIndex, Attribute::MinSize);
+  bool IsMinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
    if (IsMinSize)
      return SDValue();
  
    if (IsMinSize)
      return SDValue();
  
@@ -7881,15 +8211,15 @@ static SDValue performSTORECombine(SDNode *N,
    EVT HalfVT =
        EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
    SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
    EVT HalfVT =
        EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
    SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
-                                   DAG.getIntPtrConstant(0));
+                                   DAG.getConstant(0, DL, MVT::i64));
    SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
    SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
-                                   DAG.getIntPtrConstant(NumElts));
+                                   DAG.getConstant(NumElts, DL, MVT::i64));
    SDValue BasePtr = S->getBasePtr();
    SDValue NewST1 =
        DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
                     S->isVolatile(), S->isNonTemporal(), S->getAlignment());
    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
    SDValue BasePtr = S->getBasePtr();
    SDValue NewST1 =
        DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
                     S->isVolatile(), S->isNonTemporal(), S->getAlignment());
    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
-                                  DAG.getConstant(8, MVT::i64));
+                                  DAG.getConstant(8, DL, MVT::i64));
    return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
                        S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(),
                        S->getAlignment());
    return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
                        S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(),
                        S->getAlignment());
@@ -7957,6 +8287,13 @@ static SDValue performPostLD1Combine(SDNode *N,
        Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
      }
  
        Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
      }
  
+    // Finally, check that the vector doesn't depend on the load.
+    // Again, this would create a cycle.
+    // The load depending on the vector is fine, as that's the case for the
+    // LD1*post we'll eventually generate anyway.
+    if (LoadSDN->isPredecessorOf(Vector.getNode()))
+      continue;
+
      SmallVector<SDValue, 8> Ops;
      Ops.push_back(LD->getOperand(0));  // Chain
      if (IsLaneOp) {
      SmallVector<SDValue, 8> Ops;
      Ops.push_back(LD->getOperand(0));  // Chain
      if (IsLaneOp) {
@@ -7974,7 +8311,7 @@ static SDValue performPostLD1Combine(SDNode *N,
                                             LoadSDN->getMemOperand());
  
      // Update the uses.
                                             LoadSDN->getMemOperand());
  
      // Update the uses.
-    std::vector<SDValue> NewResults;
+    SmallVector<SDValue, 2> NewResults;
      NewResults.push_back(SDValue(LD, 0));             // The result of load
      NewResults.push_back(SDValue(UpdN.getNode(), 2)); // Chain
      DCI.CombineTo(LD, NewResults);
      NewResults.push_back(SDValue(LD, 0));             // The result of load
      NewResults.push_back(SDValue(UpdN.getNode(), 2)); // Chain
      DCI.CombineTo(LD, NewResults);
@@ -8468,17 +8805,31 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
  /// the compare-mask instructions rather than going via NZCV, even if LHS and
  /// RHS are really scalar. This replaces any scalar setcc in the above pattern
  /// with a vector one followed by a DUP shuffle on the result.
  /// the compare-mask instructions rather than going via NZCV, even if LHS and
  /// RHS are really scalar. This replaces any scalar setcc in the above pattern
  /// with a vector one followed by a DUP shuffle on the result.
-static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performSelectCombine(SDNode *N,
+                                    TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
    SDValue N0 = N->getOperand(0);
    EVT ResVT = N->getValueType(0);
  
    SDValue N0 = N->getOperand(0);
    EVT ResVT = N->getValueType(0);
  
-  if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1)
+  if (N0.getOpcode() != ISD::SETCC)
      return SDValue();
  
      return SDValue();
  
+  // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
+  // scalar SetCCResultType. We also don't expect vectors, because we assume
+  // that selects fed by vector SETCCs are canonicalized to VSELECT.
+  assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
+         "Scalar-SETCC feeding SELECT has unexpected result type!");
+
    // If NumMaskElts == 0, the comparison is larger than select result. The
    // largest real NEON comparison is 64-bits per lane, which means the result is
    // at most 32-bits and an illegal vector. Just bail out for now.
    EVT SrcVT = N0.getOperand(0).getValueType();
    // If NumMaskElts == 0, the comparison is larger than select result. The
    // largest real NEON comparison is 64-bits per lane, which means the result is
    // at most 32-bits and an illegal vector. Just bail out for now.
    EVT SrcVT = N0.getOperand(0).getValueType();
+
+  // Don't try to do this optimization when the setcc itself has i1 operands.
+  // There are no legal vectors of i1, so this would be pointless.
+  if (SrcVT == MVT::i1)
+    return SDValue();
+
    int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
    if (!ResVT.isVector() || NumMaskElts == 0)
      return SDValue();
    int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
    if (!ResVT.isVector() || NumMaskElts == 0)
      return SDValue();
@@ -8486,6 +8837,16 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
    SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
    EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
  
    SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
    EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
  
+  // Also bail out if the vector CCVT isn't the same size as ResVT.
+  // This can happen if the SETCC operand size doesn't divide the ResVT size
+  // (e.g., f64 vs v3f32).
+  if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
+    return SDValue();
+
+  // Make sure we didn't create illegal types, if we're not supposed to.
+  assert(DCI.isBeforeLegalize() ||
+         DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
+
    // First perform a vector comparison, where lane 0 is the one we're interested
    // in.
    SDLoc DL(N0);
    // First perform a vector comparison, where lane 0 is the one we're interested
    // in.
    SDLoc DL(N0);
@@ -8504,6 +8865,83 @@ static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
    return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
  }
  
    return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
  }
  
+/// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match FMIN/FMAX patterns.
+static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) {
+  // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y".
+  // Unless the NoNaNsFPMath option is set, be careful about NaNs:
+  // vmax/vmin return NaN if either operand is a NaN;
+  // only do the transformation when it matches that behavior.
+
+  SDValue CondLHS = N->getOperand(0);
+  SDValue CondRHS = N->getOperand(1);
+  SDValue LHS = N->getOperand(2);
+  SDValue RHS = N->getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+  unsigned Opcode;
+  bool IsReversed;
+  if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) &&
+      selectCCOpsAreFMaxCompatible(CondRHS, RHS)) {
+    IsReversed = false; // x CC y ? x : y
+  } else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) &&
+             selectCCOpsAreFMaxCompatible(CondLHS, RHS)) {
+    IsReversed = true ; // x CC y ? y : x
+  } else {
+    return SDValue();
+  }
+
+  bool IsUnordered = false, IsOrEqual;
+  switch (CC) {
+  default:
+    return SDValue();
+  case ISD::SETULT:
+  case ISD::SETULE:
+    IsUnordered = true;
+  case ISD::SETOLT:
+  case ISD::SETOLE:
+  case ISD::SETLT:
+  case ISD::SETLE:
+    IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE);
+    Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN;
+    break;
+
+  case ISD::SETUGT:
+  case ISD::SETUGE:
+    IsUnordered = true;
+  case ISD::SETOGT:
+  case ISD::SETOGE:
+  case ISD::SETGT:
+  case ISD::SETGE:
+    IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE);
+    Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX;
+    break;
+  }
+
+  // If LHS is NaN, an ordered comparison will be false and the result will be
+  // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking
+  // that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+  if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+    return SDValue();
+
+  // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true,
+  // but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be
+  // used for unsafe math or if one of the operands is known to be nonzero.
+  if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath &&
+      !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+    return SDValue();
+
+  return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
+}
+
+/// Get rid of unnecessary NVCASTs (that don't change the type).
+static SDValue performNVCASTCombine(SDNode *N) {
+  if (N->getValueType(0) == N->getOperand(0).getValueType())
+    return N->getOperand(0);
+
+  return SDValue();
+}
+
  SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
                                                   DAGCombinerInfo &DCI) const {
    SelectionDAG &DAG = DCI.DAG;
  SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
                                                   DAGCombinerInfo &DCI) const {
    SelectionDAG &DAG = DCI.DAG;
@@ -8519,7 +8957,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
      return performMulCombine(N, DAG, DCI, Subtarget);
    case ISD::SINT_TO_FP:
    case ISD::UINT_TO_FP:
      return performMulCombine(N, DAG, DCI, Subtarget);
    case ISD::SINT_TO_FP:
    case ISD::UINT_TO_FP:
-    return performIntToFpCombine(N, DAG);
+    return performIntToFpCombine(N, DAG, Subtarget);
    case ISD::OR:
      return performORCombine(N, DCI, Subtarget);
    case ISD::INTRINSIC_WO_CHAIN:
    case ISD::OR:
      return performORCombine(N, DCI, Subtarget);
    case ISD::INTRINSIC_WO_CHAIN:
@@ -8533,9 +8971,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
    case ISD::CONCAT_VECTORS:
      return performConcatVectorsCombine(N, DCI, DAG);
    case ISD::SELECT:
    case ISD::CONCAT_VECTORS:
      return performConcatVectorsCombine(N, DCI, DAG);
    case ISD::SELECT:
-    return performSelectCombine(N, DAG);
+    return performSelectCombine(N, DCI);
    case ISD::VSELECT:
      return performVSelectCombine(N, DCI.DAG);
    case ISD::VSELECT:
      return performVSelectCombine(N, DCI.DAG);
+  case ISD::SELECT_CC:
+    return performSelectCCCombine(N, DCI.DAG);
    case ISD::STORE:
      return performSTORECombine(N, DCI, DAG, Subtarget);
    case AArch64ISD::BRCOND:
    case ISD::STORE:
      return performSTORECombine(N, DCI, DAG, Subtarget);
    case AArch64ISD::BRCOND:
@@ -8544,6 +8984,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
      return performCONDCombine(N, DCI, DAG, 2, 3);
    case AArch64ISD::DUP:
      return performPostLD1Combine(N, DCI, false);
      return performCONDCombine(N, DCI, DAG, 2, 3);
    case AArch64ISD::DUP:
      return performPostLD1Combine(N, DCI, false);
+  case AArch64ISD::NVCAST:
+    return performNVCASTCombine(N);
    case ISD::INSERT_VECTOR_ELT:
      return performPostLD1Combine(N, DCI, true);
    case ISD::INTRINSIC_VOID:
    case ISD::INSERT_VECTOR_ELT:
      return performPostLD1Combine(N, DCI, true);
    case ISD::INTRINSIC_VOID:
@@ -8697,17 +9139,16 @@ bool AArch64TargetLowering::getPostIndexedAddressParts(
  
  static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
                                    SelectionDAG &DAG) {
  
  static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
                                    SelectionDAG &DAG) {
-  if (N->getValueType(0) != MVT::i16)
-    return;
-
    SDLoc DL(N);
    SDValue Op = N->getOperand(0);
    SDLoc DL(N);
    SDValue Op = N->getOperand(0);
-  assert(Op.getValueType() == MVT::f16 &&
-         "Inconsistent bitcast? Only 16-bit types should be i16 or f16");
+
+  if (N->getValueType(0) != MVT::i16 || Op.getValueType() != MVT::f16)
+    return;
+
    Op = SDValue(
        DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
                           DAG.getUNDEF(MVT::i32), Op,
    Op = SDValue(
        DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
                           DAG.getUNDEF(MVT::i32), Op,
-                         DAG.getTargetConstant(AArch64::hsub, MVT::i32)),
+                         DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
        0);
    Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
        0);
    Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
@@ -8768,9 +9209,11 @@ bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
  }
  
  // For the real atomic operations, we have ldxr/stxr up to 128 bits,
  }
  
  // For the real atomic operations, we have ldxr/stxr up to 128 bits,
-bool AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
+TargetLoweringBase::AtomicRMWExpansionKind
+AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
    unsigned Size = AI->getType()->getPrimitiveSizeInBits();
    unsigned Size = AI->getType()->getPrimitiveSizeInBits();
-  return Size <= 128;
+  return Size <= 128 ? AtomicRMWExpansionKind::LLSC
+                     : AtomicRMWExpansionKind::None;
  }
  
  bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
  }
  
  bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
@@ -8830,7 +9273,7 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
      Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
      Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
      Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
      Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
      Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
      Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
-    return Builder.CreateCall3(Stxr, Lo, Hi, Addr);
+    return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
    }
  
    Intrinsic::ID Int =
    }
  
    Intrinsic::ID Int =
@@ -8838,10 +9281,10 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
    Type *Tys[] = { Addr->getType() };
    Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
  
    Type *Tys[] = { Addr->getType() };
    Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
  
-  return Builder.CreateCall2(
-      Stxr, Builder.CreateZExtOrBitCast(
-                Val, Stxr->getFunctionType()->getParamType(0)),
-      Addr);
+  return Builder.CreateCall(Stxr,
+                            {Builder.CreateZExtOrBitCast(
+                                 Val, Stxr->getFunctionType()->getParamType(0)),
+                             Addr});
  }
  
  bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
  }
  
  bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(