Revert r185595-185596 which broke buildbots.

[oota-llvm.git] / lib / Target / AArch64 / AArch64ISelLowering.cpp
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp

index 4981fbaf713d6638e1fbc80ccc264c8bca45bfd9..dff01f722430cb34d40f83ed7a2d760bb638bf75 100644 (file)
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -39,12 +39,8 @@ static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
    llvm_unreachable("unknown subtarget type");
  }
  
-
  AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
-  : TargetLowering(TM, createTLOF(TM)),
-    Subtarget(&TM.getSubtarget<AArch64Subtarget>()),
-    RegInfo(TM.getRegisterInfo()),
-    Itins(TM.getInstrItineraryData()) {
+  : TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) {
  
    // SIMD compares set the entire lane's bits to 1
    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
@@ -59,13 +55,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
  
    computeRegisterProperties();
  
-  // Some atomic operations can be folded into load-acquire or store-release
-  // instructions on AArch64. It's marginally simpler to let LLVM expand
-  // everything out to a barrier and then recombine the (few) barriers we can.
-  setInsertFencesForAtomic(true);
-  setTargetDAGCombine(ISD::ATOMIC_FENCE);
-  setTargetDAGCombine(ISD::ATOMIC_STORE);
-
    // We combine OR nodes for bitfield and NEON BSL operations.
    setTargetDAGCombine(ISD::OR);
  
@@ -200,6 +189,8 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
    setOperationAction(ISD::FSIN, MVT::f32, Expand);
    setOperationAction(ISD::FSIN, MVT::f64, Expand);
  
+  setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+  setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
  
    // Virtually no operation on f128 is legal, but LLVM can't expand them when
    // there's a valid register class, so we need custom operations in most cases.
@@ -217,6 +208,7 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
    setOperationAction(ISD::FREM,       MVT::f128, Expand);
    setOperationAction(ISD::FRINT,      MVT::f128, Expand);
    setOperationAction(ISD::FSIN,       MVT::f128, Expand);
+  setOperationAction(ISD::FSINCOS,    MVT::f128, Expand);
    setOperationAction(ISD::FSQRT,      MVT::f128, Expand);
    setOperationAction(ISD::FSUB,       MVT::f128, Custom);
    setOperationAction(ISD::FTRUNC,     MVT::f128, Expand);
@@ -264,7 +256,7 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
    setExceptionSelectorRegister(AArch64::X1);
  }
  
-EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const {
+EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
    // It's reasonably important that this value matches the "natural" legal
    // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself
    // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64).
@@ -272,27 +264,34 @@ EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const {
    return VT.changeVectorElementTypeToInteger();
  }
  
-static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc,
-                                  unsigned &strOpc) {
-  switch (Size) {
-  default: llvm_unreachable("unsupported size for atomic binary op!");
-  case 1:
-    ldrOpc = AArch64::LDXR_byte;
-    strOpc = AArch64::STXR_byte;
-    break;
-  case 2:
-    ldrOpc = AArch64::LDXR_hword;
-    strOpc = AArch64::STXR_hword;
-    break;
-  case 4:
-    ldrOpc = AArch64::LDXR_word;
-    strOpc = AArch64::STXR_word;
-    break;
-  case 8:
-    ldrOpc = AArch64::LDXR_dword;
-    strOpc = AArch64::STXR_dword;
-    break;
-  }
+static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
+                                  unsigned &LdrOpc,
+                                  unsigned &StrOpc) {
+  static unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
+                                 AArch64::LDXR_word, AArch64::LDXR_dword};
+  static unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
+                                AArch64::LDAXR_word, AArch64::LDAXR_dword};
+  static unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
+                                  AArch64::STXR_word, AArch64::STXR_dword};
+  static unsigned StoreRels[] = {AArch64::STLXR_byte, AArch64::STLXR_hword,
+                                 AArch64::STLXR_word, AArch64::STLXR_dword};
+
+  unsigned *LoadOps, *StoreOps;
+  if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+    LoadOps = LoadAcqs;
+  else
+    LoadOps = LoadBares;
+
+  if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
+    StoreOps = StoreRels;
+  else
+    StoreOps = StoreBares;
+
+  assert(isPowerOf2_32(Size) && Size <= 8 &&
+         "unsupported size for atomic binary op!");
+
+  LdrOpc = LoadOps[Log2_32(Size)];
+  StrOpc = StoreOps[Log2_32(Size)];
  }
  
  MachineBasicBlock *
@@ -310,12 +309,13 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
    unsigned dest = MI->getOperand(0).getReg();
    unsigned ptr = MI->getOperand(1).getReg();
    unsigned incr = MI->getOperand(2).getReg();
+  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
    DebugLoc dl = MI->getDebugLoc();
  
    MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
  
    unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, ldrOpc, strOpc);
+  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
  
    MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
    MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -394,6 +394,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
    unsigned dest = MI->getOperand(0).getReg();
    unsigned ptr = MI->getOperand(1).getReg();
    unsigned incr = MI->getOperand(2).getReg();
+  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
+
    unsigned oldval = dest;
    DebugLoc dl = MI->getDebugLoc();
  
@@ -408,7 +410,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
    }
  
    unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, ldrOpc, strOpc);
+  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
  
    MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
    MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -476,6 +478,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
    unsigned ptr     = MI->getOperand(1).getReg();
    unsigned oldval  = MI->getOperand(2).getReg();
    unsigned newval  = MI->getOperand(3).getReg();
+  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
    DebugLoc dl = MI->getDebugLoc();
  
@@ -484,7 +487,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
    TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
  
    unsigned ldrOpc, strOpc;
-  getExclusiveOperation(Size, ldrOpc, strOpc);
+  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
  
    MachineFunction *MF = BB->getParent();
    const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -774,6 +777,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
    case AArch64ISD::TC_RETURN:      return "AArch64ISD::TC_RETURN";
    case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
    case AArch64ISD::TLSDESCCALL:    return "AArch64ISD::TLSDESCCALL";
+  case AArch64ISD::WrapperLarge:   return "AArch64ISD::WrapperLarge";
    case AArch64ISD::WrapperSmall:   return "AArch64ISD::WrapperSmall";
  
    default:                       return NULL;
@@ -818,7 +822,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
  
  void
  AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
-                                           DebugLoc DL, SDValue &Chain) const {
+                                           SDLoc DL, SDValue &Chain) const {
    MachineFunction &MF = DAG.getMachineFunction();
    MachineFrameInfo *MFI = MF.getFrameInfo();
    AArch64MachineFunctionInfo *FuncInfo
@@ -889,7 +893,7 @@ SDValue
  AArch64TargetLowering::LowerFormalArguments(SDValue Chain,
                                        CallingConv::ID CallConv, bool isVarArg,
                                        const SmallVectorImpl<ISD::InputArg> &Ins,
-                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SDLoc dl, SelectionDAG &DAG,
                                        SmallVectorImpl<SDValue> &InVals) const {
    MachineFunction &MF = DAG.getMachineFunction();
    AArch64MachineFunctionInfo *FuncInfo
@@ -1004,7 +1008,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain,
                                     CallingConv::ID CallConv, bool isVarArg,
                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
                                     const SmallVectorImpl<SDValue> &OutVals,
-                                   DebugLoc dl, SelectionDAG &DAG) const {
+                                   SDLoc dl, SelectionDAG &DAG) const {
    // CCValAssign - represent the assignment of the return value to a location.
    SmallVector<CCValAssign, 16> RVLocs;
  
@@ -1077,7 +1081,7 @@ SDValue
  AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
                                   SmallVectorImpl<SDValue> &InVals) const {
    SelectionDAG &DAG                     = CLI.DAG;
-  DebugLoc &dl                          = CLI.DL;
+  SDLoc &dl                             = CLI.DL;
    SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
    SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
    SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
@@ -1143,7 +1147,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
    }
  
    if (!IsSibCall)
-    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                                 dl);
  
    SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP,
                                          getPointerTy());
@@ -1274,7 +1279,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
    // in the correct location.
    if (IsTailCall && !IsSibCall) {
      Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
-                               DAG.getIntPtrConstant(0, true), InFlag);
+                               DAG.getIntPtrConstant(0, true), InFlag, dl);
      InFlag = Chain.getValue(1);
    }
  
@@ -1328,7 +1333,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
  
      Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
                                 DAG.getIntPtrConstant(CalleePopBytes, true),
-                               InFlag);
+                               InFlag, dl);
      InFlag = Chain.getValue(1);
    }
  
@@ -1340,7 +1345,7 @@ SDValue
  AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
                                        CallingConv::ID CallConv, bool IsVarArg,
                                        const SmallVectorImpl<ISD::InputArg> &Ins,
-                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SDLoc dl, SelectionDAG &DAG,
                                        SmallVectorImpl<SDValue> &InVals) const {
    // Assign locations to each value returned by this call.
    SmallVector<CCValAssign, 16> RVLocs;
@@ -1529,7 +1534,7 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
          }
  
     // Build a tokenfactor for all the chains.
-   return DAG.getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+   return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other,
                        &ArgChains[0], ArgChains.size());
  }
  
@@ -1562,7 +1567,7 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const {
  
  SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS,
                                          ISD::CondCode CC, SDValue &A64cc,
-                                        SelectionDAG &DAG, DebugLoc &dl) const {
+                                        SelectionDAG &DAG, SDLoc &dl) const {
    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
      int64_t C = 0;
      EVT VT = RHSC->getValueType(0);
@@ -1655,28 +1660,37 @@ static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC,
  
  SDValue
  AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    EVT PtrVT = getPointerTy();
    const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
  
-  assert(getTargetMachine().getCodeModel() == CodeModel::Small
-         && "Only small code model supported at the moment");
-
-  // The most efficient code is PC-relative anyway for the small memory model,
-  // so we don't need to worry about relocation model.
-  return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
-                     DAG.getTargetBlockAddress(BA, PtrVT, 0,
-                                               AArch64II::MO_NO_FLAG),
-                     DAG.getTargetBlockAddress(BA, PtrVT, 0,
-                                               AArch64II::MO_LO12),
-                     DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
+  switch(getTargetMachine().getCodeModel()) {
+  case CodeModel::Small:
+    // The most efficient code is PC-relative anyway for the small memory model,
+    // so we don't need to worry about relocation model.
+    return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
+                       DAG.getTargetBlockAddress(BA, PtrVT, 0,
+                                                 AArch64II::MO_NO_FLAG),
+                       DAG.getTargetBlockAddress(BA, PtrVT, 0,
+                                                 AArch64II::MO_LO12),
+                       DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
+  case CodeModel::Large:
+    return DAG.getNode(
+      AArch64ISD::WrapperLarge, DL, PtrVT,
+      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3),
+      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
+      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
+      DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
+  default:
+    llvm_unreachable("Only small and large code models supported now");
+  }
  }
  
  
  // (BRCOND chain, val, dest)
  SDValue
  AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    SDValue Chain = Op.getOperand(0);
    SDValue TheBit = Op.getOperand(1);
    SDValue DestBB = Op.getOperand(2);
@@ -1699,7 +1713,7 @@ AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
  // (BR_CC chain, condcode, lhs, rhs, dest)
  SDValue
  AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    SDValue Chain = Op.getOperand(0);
    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
    SDValue LHS = Op.getOperand(2);
@@ -1785,7 +1799,7 @@ AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
    CallLoweringInfo CLI(InChain, RetTy, false, false, false, false,
                      0, getLibcallCallingConv(Call), isTailCall,
                      /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
-                    Callee, Args, DAG, Op->getDebugLoc());
+                    Callee, Args, DAG, SDLoc(Op));
    std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
  
    if (!CallInfo.second.getNode())
@@ -1807,7 +1821,7 @@ AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
  
    SDValue SrcVal = Op.getOperand(0);
    return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
-                     /*isSigned*/ false, Op.getDebugLoc());
+                     /*isSigned*/ false, SDLoc(Op));
  }
  
  SDValue
@@ -1838,25 +1852,45 @@ AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
  }
  
  SDValue
-AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
-                                             SelectionDAG &DAG) const {
-  // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
-  // we make that distinction here.
+AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  assert(getTargetMachine().getCodeModel() == CodeModel::Large);
+  assert(getTargetMachine().getRelocationModel() == Reloc::Static);
+
+  EVT PtrVT = getPointerTy();
+  SDLoc dl(Op);
+  const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *GV = GN->getGlobal();
  
-  // We support the small memory model for now.
+  SDValue GlobalAddr = DAG.getNode(
+      AArch64ISD::WrapperLarge, dl, PtrVT,
+      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3),
+      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
+      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
+      DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
+
+  if (GN->getOffset() != 0)
+    return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
+                       DAG.getConstant(GN->getOffset(), PtrVT));
+
+  return GlobalAddr;
+}
+
+SDValue
+AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op,
+                                                  SelectionDAG &DAG) const {
    assert(getTargetMachine().getCodeModel() == CodeModel::Small);
  
    EVT PtrVT = getPointerTy();
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
    const GlobalValue *GV = GN->getGlobal();
    unsigned Alignment = GV->getAlignment();
    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
-
-  if (GV->isWeakForLinker() && RelocM == Reloc::Static) {
-    // Weak symbols can't use ADRP/ADD pair since they should evaluate to
-    // zero when undefined. In PIC mode the GOT can take care of this, but in
-    // absolute mode we use a constant pool load.
+  if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
+    // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
+    // to zero when they remain undefined. In PIC mode the GOT can take care of
+    // this, but in absolute mode we use a constant pool load.
      SDValue PoolAddr;
      PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
                             DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
@@ -1864,10 +1898,16 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
                             DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
                                                       AArch64II::MO_LO12),
                             DAG.getConstant(8, MVT::i32));
-    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
-                       MachinePointerInfo::getConstantPool(),
-                       /*isVolatile=*/ false,  /*isNonTemporal=*/ true,
-                       /*isInvariant=*/ true, 8);
+    SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
+                                     MachinePointerInfo::getConstantPool(),
+                                     /*isVolatile=*/ false,
+                                     /*isNonTemporal=*/ true,
+                                     /*isInvariant=*/ true, 8);
+    if (GN->getOffset() != 0)
+      return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
+                         DAG.getConstant(GN->getOffset(), PtrVT));
+
+    return GlobalAddr;
    }
  
    if (Alignment == 0) {
@@ -1884,7 +1924,7 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
    }
  
    unsigned char HiFixup, LoFixup;
-  bool UseGOT = Subtarget->GVIsIndirectSymbol(GV, RelocM);
+  bool UseGOT = getSubtarget()->GVIsIndirectSymbol(GV, RelocM);
  
    if (UseGOT) {
      HiFixup = AArch64II::MO_GOT;
@@ -1917,9 +1957,25 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
    return GlobalRef;
  }
  
+SDValue
+AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
+  // we make those distinctions here.
+
+  switch (getTargetMachine().getCodeModel()) {
+  case CodeModel::Small:
+    return LowerGlobalAddressELFSmall(Op, DAG);
+  case CodeModel::Large:
+    return LowerGlobalAddressELFLarge(Op, DAG);
+  default:
+    llvm_unreachable("Only small and large code models supported now");
+  }
+}
+
  SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
                                                  SDValue DescAddr,
-                                                DebugLoc DL,
+                                                SDLoc DL,
                                                  SelectionDAG &DAG) const {
    EVT PtrVT = getPointerTy();
  
@@ -1964,15 +2020,17 @@ SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
  SDValue
  AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                               SelectionDAG &DAG) const {
-  assert(Subtarget->isTargetELF() &&
+  assert(getSubtarget()->isTargetELF() &&
           "TLS not implemented for non-ELF targets");
+  assert(getTargetMachine().getCodeModel() == CodeModel::Small
+         && "TLS only supported in small memory model");
    const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
  
    TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
  
    SDValue TPOff;
    EVT PtrVT = getPointerTy();
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
    const GlobalValue *GV = GA->getGlobal();
  
    SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
@@ -2073,21 +2131,34 @@ AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
  SDValue
  AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
    JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  DebugLoc dl = JT->getDebugLoc();
+  SDLoc dl(JT);
+  EVT PtrVT = getPointerTy();
  
    // When compiling PIC, jump tables get put in the code section so a static
    // relocation-style is acceptable for both cases.
-  return DAG.getNode(AArch64ISD::WrapperSmall, dl, getPointerTy(),
-                     DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()),
-                     DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
-                                            AArch64II::MO_LO12),
-                     DAG.getConstant(1, MVT::i32));
+  switch (getTargetMachine().getCodeModel()) {
+  case CodeModel::Small:
+    return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
+                       DAG.getTargetJumpTable(JT->getIndex(), PtrVT),
+                       DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+                                              AArch64II::MO_LO12),
+                       DAG.getConstant(1, MVT::i32));
+  case CodeModel::Large:
+    return DAG.getNode(
+      AArch64ISD::WrapperLarge, dl, PtrVT,
+      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3),
+      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC),
+      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC),
+      DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC));
+  default:
+    llvm_unreachable("Only small and large code models supported now");
+  }
  }
  
  // (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
  SDValue
  AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    SDValue LHS = Op.getOperand(0);
    SDValue RHS = Op.getOperand(1);
    SDValue IfTrue = Op.getOperand(2);
@@ -2143,7 +2214,7 @@ AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
  // (SELECT testbit, iftrue, iffalse)
  SDValue
  AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    SDValue TheBit = Op.getOperand(0);
    SDValue IfTrue = Op.getOperand(1);
    SDValue IfFalse = Op.getOperand(2);
@@ -2165,7 +2236,7 @@ AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
  // (SETCC lhs, rhs, condcode)
  SDValue
  AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
+  SDLoc dl(Op);
    SDValue LHS = Op.getOperand(0);
    SDValue RHS = Op.getOperand(1);
    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
@@ -2224,7 +2295,7 @@ AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
  
    // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
    // rather than just 8.
-  return DAG.getMemcpy(Op.getOperand(0), Op.getDebugLoc(),
+  return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op),
                         Op.getOperand(1), Op.getOperand(2),
                         DAG.getConstant(32, MVT::i32), 8, false, false,
                         MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
@@ -2237,7 +2308,7 @@ AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
    MachineFunction &MF = DAG.getMachineFunction();
    AArch64MachineFunctionInfo *FuncInfo
      = MF.getInfo<AArch64MachineFunctionInfo>();
-  DebugLoc DL = Op.getDebugLoc();
+  SDLoc DL(Op);
  
    SDValue Chain = Op.getOperand(0);
    SDValue VAList = Op.getOperand(1);
@@ -2336,7 +2407,7 @@ static SDValue PerformANDCombine(SDNode *N,
                                   TargetLowering::DAGCombinerInfo &DCI) {
  
    SelectionDAG &DAG = DCI.DAG;
-  DebugLoc DL = N->getDebugLoc();
+  SDLoc DL(N);
    EVT VT = N->getValueType(0);
  
    // We're looking for an SRA/SHL pair which form an SBFX.
@@ -2369,84 +2440,12 @@ static SDValue PerformANDCombine(SDNode *N,
                       DAG.getConstant(LSB + Width - 1, MVT::i64));
  }
  
-static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode,
-                                         TargetLowering::DAGCombinerInfo &DCI) {
-  // An atomic operation followed by an acquiring atomic fence can be reduced to
-  // an acquiring load. The atomic operation provides a convenient pointer to
-  // load from. If the original operation was a load anyway we can actually
-  // combine the two operations into an acquiring load.
-  SelectionDAG &DAG = DCI.DAG;
-  SDValue AtomicOp = FenceNode->getOperand(0);
-  AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp);
-
-  // A fence on its own can't be optimised
-  if (!AtomicNode)
-    return SDValue();
-
-  AtomicOrdering FenceOrder
-    = static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1));
-  SynchronizationScope FenceScope
-    = static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2));
-
-  if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope())
-    return SDValue();
-
-  // If the original operation was an ATOMIC_LOAD then we'll be replacing it, so
-  // the chain we use should be its input, otherwise we'll put our store after
-  // it so we use its output chain.
-  SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ?
-    AtomicNode->getChain() : AtomicOp;
-
-  // We have an acquire fence with a handy atomic operation nearby, we can
-  // convert the fence into a load-acquire, discarding the result.
-  DebugLoc DL = FenceNode->getDebugLoc();
-  SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(),
-                             AtomicNode->getValueType(0),
-                             Chain,                  // Chain
-                             AtomicOp.getOperand(1), // Pointer
-                             AtomicNode->getMemOperand(), Acquire,
-                             FenceScope);
-
-  if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD)
-    DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode());
-
-  return Op.getValue(1);
-}
-
-static SDValue PerformATOMIC_STORECombine(SDNode *N,
-                                         TargetLowering::DAGCombinerInfo &DCI) {
-  // A releasing atomic fence followed by an atomic store can be combined into a
-  // single store operation.
-  SelectionDAG &DAG = DCI.DAG;
-  AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N);
-  SDValue FenceOp = AtomicNode->getOperand(0);
-
-  if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE)
-    return SDValue();
-
-  AtomicOrdering FenceOrder
-    = static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1));
-  SynchronizationScope FenceScope
-    = static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2));
-
-  if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope())
-    return SDValue();
-
-  DebugLoc DL = AtomicNode->getDebugLoc();
-  return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(),
-                       FenceOp.getOperand(0),  // Chain
-                       AtomicNode->getOperand(1),       // Pointer
-                       AtomicNode->getOperand(2),       // Value
-                       AtomicNode->getMemOperand(), Release,
-                       FenceScope);
-}
-
  /// For a true bitfield insert, the bits getting into that contiguous mask
  /// should come from the low part of an existing value: they must be formed from
  /// a compatible SHL operation (unless they're already low). This function
  /// checks that condition and returns the least-significant bit that's
  /// intended. If the operation not a field preparation, -1 is returned.
-static int32_t getLSBForBFI(SelectionDAG &DAG, DebugLoc DL, EVT VT,
+static int32_t getLSBForBFI(SelectionDAG &DAG, SDLoc DL, EVT VT,
                              SDValue &MaskedVal, uint64_t Mask) {
    if (!isShiftedMask_64(Mask))
      return -1;
@@ -2462,7 +2461,7 @@ static int32_t getLSBForBFI(SelectionDAG &DAG, DebugLoc DL, EVT VT,
    // cases (e.g. bitfield to bitfield copy) may still need a real shift before
    // the BFI.
  
-  uint64_t LSB = CountTrailingZeros_64(Mask);
+  uint64_t LSB = countTrailingZeros(Mask);
    int64_t ShiftRightRequired = LSB;
    if (MaskedVal.getOpcode() == ISD::SHL &&
        isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
@@ -2522,7 +2521,7 @@ static SDValue tryCombineToBFI(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const AArch64Subtarget *Subtarget) {
    SelectionDAG &DAG = DCI.DAG;
-  DebugLoc DL = N->getDebugLoc();
+  SDLoc DL(N);
    EVT VT = N->getValueType(0);
  
    assert(N->getOpcode() == ISD::OR && "Unexpected root");
@@ -2603,7 +2602,7 @@ static SDValue tryCombineToLargerBFI(SDNode *N,
                                       TargetLowering::DAGCombinerInfo &DCI,
                                       const AArch64Subtarget *Subtarget) {
    SelectionDAG &DAG = DCI.DAG;
-  DebugLoc DL = N->getDebugLoc();
+  SDLoc DL(N);
    EVT VT = N->getValueType(0);
  
    // First job is to hunt for a MaskedBFI on either the left or right. Swap
@@ -2685,7 +2684,7 @@ static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
  static SDValue tryCombineToEXTR(SDNode *N,
                                  TargetLowering::DAGCombinerInfo &DCI) {
    SelectionDAG &DAG = DCI.DAG;
-  DebugLoc DL = N->getDebugLoc();
+  SDLoc DL(N);
    EVT VT = N->getValueType(0);
  
    assert(N->getOpcode() == ISD::OR && "Unexpected root");
@@ -2757,7 +2756,7 @@ static SDValue PerformSRACombine(SDNode *N,
                                   TargetLowering::DAGCombinerInfo &DCI) {
  
    SelectionDAG &DAG = DCI.DAG;
-  DebugLoc DL = N->getDebugLoc();
+  SDLoc DL(N);
    EVT VT = N->getValueType(0);
  
    // We're looking for an SRA/SHL pair which form an SBFX.
@@ -2796,9 +2795,7 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
    switch (N->getOpcode()) {
    default: break;
    case ISD::AND: return PerformANDCombine(N, DCI);
-  case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI);
-  case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI);
-  case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
+  case ISD::OR: return PerformORCombine(N, DCI, getSubtarget());
    case ISD::SRA: return PerformSRACombine(N, DCI);
    }
    return SDValue();
@@ -2899,7 +2896,7 @@ AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
    case 'S': {
      // An absolute symbolic address or label reference.
      if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
-      Result = DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getDebugLoc(),
+      Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
                                            GA->getValueType(0));
      } else if (const BlockAddressSDNode *BA
                   = dyn_cast<BlockAddressSDNode>(Op)) {
@@ -2935,7 +2932,7 @@ AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
  std::pair<unsigned, const TargetRegisterClass*>
  AArch64TargetLowering::getRegForInlineAsmConstraint(
                                                    const std::string &Constraint,
-                                                  EVT VT) const {
+                                                  MVT VT) const {
    if (Constraint.size() == 1) {
      switch (Constraint[0]) {
      case 'r':