From 928410cd128d16105d07309906f926ca4de3de96 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 7 Jan 2016 09:03:03 +0000 Subject: [PATCH] ARM: support TLS accesses on Darwin platforms Darwin TLS accesses most closely resemble ELF's general-dynamic situation, since they have to be able to handle all possible situations. The descriptors and so on are obviously slightly different though. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257039 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 8 ++ lib/Target/ARM/ARMBaseRegisterInfo.h | 1 + lib/Target/ARM/ARMCallingConv.td | 4 + lib/Target/ARM/ARMFastISel.cpp | 2 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 15 ++- lib/Target/ARM/ARMISelLowering.cpp | 72 ++++++++++- lib/Target/ARM/ARMISelLowering.h | 2 + lib/Target/ARM/ARMInstrInfo.td | 21 ++++ lib/Target/ARM/ARMInstrThumb.td | 8 ++ lib/Target/ARM/ARMInstrThumb2.td | 7 ++ test/CodeGen/ARM/darwin-tls.ll | 165 +++++++++++++++++++++++++ 11 files changed, 297 insertions(+), 8 deletions(-) create mode 100644 test/CodeGen/ARM/darwin-tls.ll diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 419717c85a7..adc6b6b1894 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -105,6 +105,14 @@ ARMBaseRegisterInfo::getNoPreservedMask() const { return CSR_NoRegs_RegMask; } +const uint32_t * +ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const { + assert(MF.getSubtarget().isTargetDarwin() && + "only know about special TLS call on Darwin"); + return CSR_iOS_TLSCall_RegMask; +} + + const uint32_t * ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index cea8b80c782..e2335b0480e 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -95,6 +95,7 @@ public: const uint32_t *getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override; const uint32_t *getNoPreservedMask() const override; + const uint32_t *getTLSCallPreservedMask(const MachineFunction &MF) const; /// getThisReturnPreservedMask - Returns a call preserved mask specific to the /// case that 'returned' is on an i32 first argument if the calling convention diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 23351641514..22ea166d540 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -225,6 +225,10 @@ def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS_ThisReturn, R9))>; +def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP, + (sequence "R%u", 12, 1), + (sequence "D%u", 31, 0))>; + // The "interrupt" attribute is used to generate code that is acceptable in // exception-handlers of various kinds. It makes us use a different return // instruction (handled elsewhere) and affects which registers we must return to diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 9bdf823c85b..b5f1ac43bc9 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -578,7 +578,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { // For now 32-bit only. - if (VT != MVT::i32) return 0; + if (VT != MVT::i32 || GV->isThreadLocal()) return 0; Reloc::Model RelocM = TM.getRelocationModel(); bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 024244092a3..dfbb9695947 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -622,7 +622,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, } if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); } else Base = N; @@ -801,7 +802,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); } else if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -1067,7 +1069,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = CurDAG->getTargetFrameIndex( FI, TLI->getPointerTy(CurDAG->getDataLayout())); } else if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); } Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), @@ -1186,7 +1189,8 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, if (N.getOpcode() == ISD::ADD) { return false; // We want to select register offset instead } else if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); } else { Base = N; @@ -1292,7 +1296,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, } if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { + N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && + N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::TargetConstantPool) return false; // We want to select t2LDRpci instead. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 9cfb06b00c4..d49f46004d1 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2530,6 +2530,72 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); } +/// \brief Convert a TLS address reference into the correct sequence of loads +/// and calls to compute the variable's address for Darwin, and return an +/// SDValue containing the final node. + +/// Darwin only has one TLS scheme which must be capable of dealing with the +/// fully general situation, in the worst case. This means: +/// + "extern __thread" declaration. +/// + Defined in a possibly unknown dynamic library. +/// +/// The general system is that each __thread variable has a [3 x i32] descriptor +/// which contains information used by the runtime to calculate the address. The +/// only part of this the compiler needs to know about is the first word, which +/// contains a function pointer that must be called with the address of the +/// entire descriptor in "r0". +/// +/// Since this descriptor may be in a different unit, in general access must +/// proceed along the usual ARM rules. A common sequence to produce is: +/// +/// movw rT1, :lower16:_var$non_lazy_ptr +/// movt rT1, :upper16:_var$non_lazy_ptr +/// ldr r0, [rT1] +/// ldr rT2, [r0] +/// blx rT2 +/// [...address now in r0...] +SDValue +ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin"); + SDLoc DL(Op); + + // First step is to get the address of the actua global symbol. This is where + // the TLS descriptor lives. + SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG); + + // The first entry in the descriptor is a function pointer that we must call + // to obtain the address of the variable. + SDValue Chain = DAG.getEntryNode(); + SDValue FuncTLVGet = + DAG.getLoad(MVT::i32, DL, Chain, DescAddr, + MachinePointerInfo::getGOT(DAG.getMachineFunction()), + false, true, true, 4); + Chain = FuncTLVGet.getValue(1); + + MachineFunction &F = DAG.getMachineFunction(); + MachineFrameInfo *MFI = F.getFrameInfo(); + MFI->setAdjustsStack(true); + + // TLS calls preserve all registers except those that absolutely must be + // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be + // silly). + auto TRI = + getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo(); + auto ARI = static_cast(TRI); + const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction()); + + // Finally, we can make the call. This is just a degenerate version of a + // normal AArch64 call node: r0 takes the address of the descriptor, and + // returns the address of the variable in this thread. + Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue()); + Chain = + DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue), + Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32), + DAG.getRegisterMask(Mask), Chain.getValue(1)); + return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1)); +} + // Lower ISD::GlobalTLSAddress using the "general dynamic" model SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, @@ -2631,9 +2697,11 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SDValue ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { + if (Subtarget->isTargetDarwin()) + return LowerGlobalTLSAddressDarwin(Op, DAG); + // TODO: implement the "local dynamic" model - assert(Subtarget->isTargetELF() && - "TLS not implemented for non-ELF targets"); + assert(Subtarget->isTargetELF() && "Only ELF implemented here"); GlobalAddressSDNode *GA = cast(Op); if (DAG.getTarget().Options.EmulatedTLS) return LowerToTLSEmulatedModel(GA, DAG); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index b764624f149..19aac816498 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -526,6 +526,8 @@ namespace llvm { SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA, SelectionDAG &DAG, TLSModel::Model model) const; + SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b9de83bfe6d..c446ba3109e 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -5398,6 +5398,27 @@ def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), Requires<[IsARM, UseMovt]>; } // isReMaterializable +// The many different faces of TLS access. +def : ARMPat<(ARMWrapper tglobaltlsaddr :$dst), + (MOVi32imm tglobaltlsaddr :$dst)>, + Requires<[IsARM, UseMovt]>; + +def : Pat<(ARMWrapper tglobaltlsaddr:$src), + (LDRLIT_ga_abs tglobaltlsaddr:$src)>, + Requires<[IsARM, DontUseMovt]>; + +def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), + (MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovt]>; + +def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), + (LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>, + Requires<[IsARM, DontUseMovt]>; +let AddedComplexity = 10 in +def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)), + (MOV_ga_pcrel_ldr tglobaltlsaddr:$addr)>, + Requires<[IsARM, UseMovt]>; + + // ConstantPool, GlobalAddress, and JumpTable def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index df6f2430635..5b1f9a06442 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1366,6 +1366,14 @@ def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src), (ARMWrapper tglobaladdr:$src))]>, Requires<[IsThumb, DontUseMovt]>; +// TLS globals +def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), + (tLDRLIT_ga_pcrel tglobaltlsaddr:$addr)>, + Requires<[IsThumb, DontUseMovt]>; +def : Pat<(ARMWrapper tglobaltlsaddr:$addr), + (tLDRLIT_ga_abs tglobaltlsaddr:$addr)>, + Requires<[IsThumb, DontUseMovt]>; + // JumpTable def : T1Pat<(ARMWrapperJT tjumptable:$dst), diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index d460d33fa0a..f42f4569b2f 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3875,6 +3875,13 @@ def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), } +def : T2Pat<(ARMWrapperPIC tglobaltlsaddr :$dst), + (t2MOV_ga_pcrel tglobaltlsaddr:$dst)>, + Requires<[IsThumb2, UseMovt]>; +def : T2Pat<(ARMWrapper tglobaltlsaddr:$dst), + (t2MOVi32imm tglobaltlsaddr:$dst)>, + Requires<[IsThumb2, UseMovt]>; + // ConstantPool, GlobalAddress, and JumpTable def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>, diff --git a/test/CodeGen/ARM/darwin-tls.ll b/test/CodeGen/ARM/darwin-tls.ll new file mode 100644 index 00000000000..e1995322202 --- /dev/null +++ b/test/CodeGen/ARM/darwin-tls.ll @@ -0,0 +1,165 @@ +; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - -fast-isel %s | FileCheck %s --check-prefix=T2-MOVT-PIC +; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s -mattr=+no-movt | FileCheck %s --check-prefix=T2-LIT-PIC +; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s -relocation-model=static | FileCheck %s --check-prefix=T2-MOVT-STATIC +; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s -mattr=+no-movt -relocation-model=static | FileCheck %s --check-prefix=T2-LIT-STATIC +; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=ARM-MOVT-PIC +; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s -mattr=+no-movt | FileCheck %s --check-prefix=ARM-LIT-PIC +; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s -relocation-model=static | FileCheck %s --check-prefix=ARM-MOVT-STATIC +; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s -mattr=+no-movt -relocation-model=static | FileCheck %s --check-prefix=ARM-LIT-STATIC + + +@local_tls_var = thread_local global i32 0 +@external_tls_var = external thread_local global i32 + +define i32 @test_local_tls() { +; T2-MOVT-PIC-LABEL: test_local_tls: +; T2-MOVT-PIC: movw r0, :lower16:(_local_tls_var-([[PCREL_LOC:LPC[0-9]+_[0-9]+]]+4)) +; T2-MOVT-PIC: movt r0, :upper16:(_local_tls_var-([[PCREL_LOC]]+4)) +; T2-MOVT-PIC: [[PCREL_LOC]]: +; T2-MOVT-PIC-NEXT: add r0, pc +; T2-MOVT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; T2-MOVT-PIC: blx [[TLV_GET_ADDR]] +; T2-MOVT-PIC: ldr r0, [r0] + +; T2-LIT-PIC-LABEL: test_local_tls: +; T2-LIT-PIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]] +; T2-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]: +; T2-LIT-PIC-NEXT: add r0, pc +; T2-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; T2-LIT-PIC: blx [[TLV_GET_ADDR]] +; T2-LIT-PIC: ldr r0, [r0] +; T2-LIT-PIC: [[LOCAL_VAR_ADDR]]: +; T2-LIT-PIC-NEXT: .long _local_tls_var-([[PCREL_LOC]]+4) + +; T2-MOVT-STATIC-LABEL: test_local_tls: +; T2-MOVT-STATIC: movw r0, :lower16:_local_tls_var +; T2-MOVT-STATIC: movt r0, :upper16:_local_tls_var +; T2-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; T2-MOVT-STATIC: blx [[TLV_GET_ADDR]] +; T2-MOVT-STATIC: ldr r0, [r0] + +; T2-LIT-STATIC-LABEL: test_local_tls: +; T2-LIT-STATIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]] +; T2-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; T2-LIT-STATIC: blx [[TLV_GET_ADDR]] +; T2-LIT-STATIC: ldr r0, [r0] +; T2-LIT-STATIC: [[LOCAL_VAR_ADDR]]: +; T2-LIT-STATIC-NEXT: .long _local_tls_var + +; ARM-MOVT-PIC-LABEL: test_local_tls: +; ARM-MOVT-PIC: movw [[VARPC1:r[0-9]+]], :lower16:(_local_tls_var-([[PCREL_LOC1:LPC[0-9]+_[0-9]+]]+8)) +; ARM-MOVT-PIC: movt [[VARPC1]], :upper16:(_local_tls_var-([[PCREL_LOC1]]+8)) +; ARM-MOVT-PIC: [[PCREL_LOC1]]: +; ARM-MOVT-PIC: add r0, pc, [[VARPC1]] +; ARM-MOVT-PIC: movw [[VARPC2:r[0-9]+]], :lower16:(_local_tls_var-([[PCREL_LOC2:LPC[0-9]+_[0-9]+]]+8)) +; ARM-MOVT-PIC: movt [[VARPC2]], :upper16:(_local_tls_var-([[PCREL_LOC2]]+8)) +; ARM-MOVT-PIC: [[PCREL_LOC2]]: +; ARM-MOVT-PIC-NEXT: ldr [[TLV_GET_ADDR:r[0-9]+]], [pc, [[VARPC2]]] +; ARM-MOVT-PIC: blx [[TLV_GET_ADDR]] +; ARM-MOVT-PIC: ldr r0, [r0] + +; ARM-LIT-PIC-LABEL: test_local_tls: +; ARM-LIT-PIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]] +; ARM-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]: +; ARM-LIT-PIC-NEXT: add r0, pc +; ARM-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; ARM-LIT-PIC: blx [[TLV_GET_ADDR]] +; ARM-LIT-PIC: ldr r0, [r0] +; ARM-LIT-PIC: [[LOCAL_VAR_ADDR]]: +; ARM-LIT-PIC-NEXT: .long _local_tls_var-([[PCREL_LOC]]+8) + +; ARM-MOVT-STATIC-LABEL: test_local_tls: +; ARM-MOVT-STATIC: movw r0, :lower16:_local_tls_var +; ARM-MOVT-STATIC: movt r0, :upper16:_local_tls_var +; ARM-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; ARM-MOVT-STATIC: blx [[TLV_GET_ADDR]] +; ARM-MOVT-STATIC: ldr r0, [r0] + +; ARM-LIT-STATIC-LABEL: test_local_tls: +; ARM-LIT-STATIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]] +; ARM-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; ARM-LIT-STATIC: blx [[TLV_GET_ADDR]] +; ARM-LIT-STATIC: ldr r0, [r0] +; ARM-LIT-STATIC: [[LOCAL_VAR_ADDR]]: +; ARM-LIT-STATIC-NEXT: .long _local_tls_var + + + %val = load i32, i32* @local_tls_var, align 4 + ret i32 %val +} + +define i32 @test_external_tls() { +; T2-MOVT-PIC-LABEL: test_external_tls: +; T2-MOVT-PIC: movw r[[EXTGOT:[0-9]+]], :lower16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC:LPC[0-9]+_[0-9]+]]+4)) +; T2-MOVT-PIC: movt r[[EXTGOT]], :upper16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+4)) +; T2-MOVT-PIC: [[PCREL_LOC]]: +; T2-MOVT-PIC-NEXT: add r[[EXTGOT]], pc +; T2-MOVT-PIC: ldr r0, [r[[EXTGOT]]] +; T2-MOVT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; T2-MOVT-PIC: blx [[TLV_GET_ADDR]] +; T2-MOVT-PIC: ldr r0, [r0] + +; T2-LIT-PIC-LABEL: test_external_tls: +; T2-LIT-PIC: ldr r[[EXTGOT:[0-9]+]], [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]] +; T2-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]: +; T2-LIT-PIC-NEXT: add r[[EXTGOT]], pc +; T2-LIT-PIC: ldr r0, [r[[EXTGOT]]] +; T2-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; T2-LIT-PIC: blx [[TLV_GET_ADDR]] +; T2-LIT-PIC: ldr r0, [r0] +; T2-LIT-PIC: [[EXTERNAL_VAR_ADDR]]: +; T2-LIT-PIC-NEXT: .long L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+4) + +; T2-MOVT-STATIC-LABEL: test_external_tls: +; T2-MOVT-STATIC: movw r0, :lower16:_external_tls_var +; T2-MOVT-STATIC: movt r0, :upper16:_external_tls_var +; T2-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; T2-MOVT-STATIC: blx [[TLV_GET_ADDR]] +; T2-MOVT-STATIC: ldr r0, [r0] + +; T2-LIT-STATIC-LABEL: test_external_tls: +; T2-LIT-STATIC: ldr r0, [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]] +; T2-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; T2-LIT-STATIC: blx [[TLV_GET_ADDR]] +; T2-LIT-STATIC: ldr r0, [r0] +; T2-LIT-STATIC: [[EXTERNAL_VAR_ADDR]]: +; T2-LIT-STATIC-NEXT: .long _external_tls_var + +; ARM-MOVT-PIC-LABEL: test_external_tls: +; ARM-MOVT-PIC: movw r[[EXTGOT:[0-9]+]], :lower16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC:LPC[0-9]+_[0-9]+]]+8)) +; ARM-MOVT-PIC: movt r[[EXTGOT]], :upper16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+8)) +; ARM-MOVT-PIC: [[PCREL_LOC]]: +; ARM-MOVT-PIC-NEXT: ldr r0, [pc, r[[EXTGOT]]] +; ARM-MOVT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; ARM-MOVT-PIC: blx [[TLV_GET_ADDR]] +; ARM-MOVT-PIC: ldr r0, [r0] + +; ARM-LIT-PIC-LABEL: test_external_tls: +; ARM-LIT-PIC: ldr r[[EXTGOT:[0-9]+]], [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]] +; ARM-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]: +; ARM-LIT-PIC-NEXT: add r[[EXTGOT]], pc +; ARM-LIT-PIC: ldr r0, [r[[EXTGOT]]] +; ARM-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; ARM-LIT-PIC: blx [[TLV_GET_ADDR]] +; ARM-LIT-PIC: ldr r0, [r0] +; ARM-LIT-PIC: [[EXTERNAL_VAR_ADDR]]: +; ARM-LIT-PIC-NEXT: .long L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+8) + +; ARM-MOVT-STATIC-LABEL: test_external_tls: +; ARM-MOVT-STATIC: movw r0, :lower16:_external_tls_var +; ARM-MOVT-STATIC: movt r0, :upper16:_external_tls_var +; ARM-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; ARM-MOVT-STATIC: blx [[TLV_GET_ADDR]] +; ARM-MOVT-STATIC: ldr r0, [r0] + +; ARM-LIT-STATIC-LABEL: test_external_tls: +; ARM-LIT-STATIC: ldr r0, [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]] +; ARM-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0] +; ARM-LIT-STATIC: blx [[TLV_GET_ADDR]] +; ARM-LIT-STATIC: ldr r0, [r0] +; ARM-LIT-STATIC: [[EXTERNAL_VAR_ADDR]]: +; ARM-LIT-STATIC-NEXT: .long _external_tls_var + + %val = load i32, i32* @external_tls_var, align 4 + ret i32 %val +} -- 2.34.1