From bebd59c74ba55cdda62466f72c5031274c37b93b Mon Sep 17 00:00:00 2001 From: Ulrich Weigand Date: Wed, 18 Feb 2015 09:13:27 +0000 Subject: [PATCH] [SystemZ] Support all TLS access models - CodeGen part The current SystemZ back-end only supports the local-exec TLS access model. This patch adds all required CodeGen support for the other TLS models, which means in particular: - Expand initial-exec TLS accesses by loading TLS offsets from the GOT using @indntpoff relocations. - Expand general-dynamic and local-dynamic accesses by generating the appropriate calls to __tls_get_offset. Note that this routine has a non-standard ABI and requires loading the GOT pointer into %r12, so the patch also adds support for the GLOBAL_OFFSET_TABLE ISD node. - Add a new platform-specific optimization pass to remove redundant __tls_get_offset calls in the local-dynamic model (modeled after the corresponding X86 pass). - Add test cases verifying all access models and optimizations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229654 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/CMakeLists.txt | 1 + lib/Target/SystemZ/SystemZ.h | 1 + lib/Target/SystemZ/SystemZAsmPrinter.cpp | 37 +++++ .../SystemZ/SystemZConstantPoolValue.cpp | 5 + lib/Target/SystemZ/SystemZConstantPoolValue.h | 8 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 132 ++++++++++++++-- lib/Target/SystemZ/SystemZISelLowering.h | 8 + lib/Target/SystemZ/SystemZInstrInfo.h | 7 +- lib/Target/SystemZ/SystemZInstrInfo.td | 15 ++ lib/Target/SystemZ/SystemZLDCleanup.cpp | 143 ++++++++++++++++++ lib/Target/SystemZ/SystemZMCInstLower.cpp | 2 + .../SystemZ/SystemZMachineFunctionInfo.h | 8 +- lib/Target/SystemZ/SystemZOperators.td | 7 + lib/Target/SystemZ/SystemZTargetMachine.cpp | 4 + test/CodeGen/SystemZ/tls-01.ll | 6 +- test/CodeGen/SystemZ/tls-02.ll | 18 +++ test/CodeGen/SystemZ/tls-03.ll | 23 +++ test/CodeGen/SystemZ/tls-04.ll | 28 ++++ test/CodeGen/SystemZ/tls-05.ll | 15 ++ test/CodeGen/SystemZ/tls-06.ll | 17 +++ test/CodeGen/SystemZ/tls-07.ll | 16 ++ 21 files changed, 482 insertions(+), 19 deletions(-) create mode 100644 lib/Target/SystemZ/SystemZLDCleanup.cpp create mode 100644 test/CodeGen/SystemZ/tls-02.ll create mode 100644 test/CodeGen/SystemZ/tls-03.ll create mode 100644 test/CodeGen/SystemZ/tls-04.ll create mode 100644 test/CodeGen/SystemZ/tls-05.ll create mode 100644 test/CodeGen/SystemZ/tls-06.ll create mode 100644 test/CodeGen/SystemZ/tls-07.ll diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt index 41a614d9d15..60a39129640 100644 --- a/lib/Target/SystemZ/CMakeLists.txt +++ b/lib/Target/SystemZ/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_target(SystemZCodeGen SystemZISelDAGToDAG.cpp SystemZISelLowering.cpp SystemZInstrInfo.cpp + SystemZLDCleanup.cpp SystemZLongBranch.cpp SystemZMachineFunctionInfo.cpp SystemZMCInstLower.cpp diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index c8b95b2b2ca..5f17edb51a9 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -111,6 +111,7 @@ FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); } // end namespace llvm #endif diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp index 8b18bc16e1c..3eb13abfa4e 100644 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp @@ -66,6 +66,20 @@ static MCInst lowerRIEfLow(const MachineInstr *MI, unsigned Opcode) { .addImm(MI->getOperand(5).getImm()); } +static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) { + StringRef Name = "__tls_get_offset"; + return MCSymbolRefExpr::Create(Context.GetOrCreateSymbol(Name), + MCSymbolRefExpr::VK_PLT, + Context); +} + +static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) { + StringRef Name = "_GLOBAL_OFFSET_TABLE_"; + return MCSymbolRefExpr::Create(Context.GetOrCreateSymbol(Name), + MCSymbolRefExpr::VK_None, + Context); +} + void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { SystemZMCInstLower Lower(MF->getContext(), *this); MCInst LoweredMI; @@ -95,6 +109,26 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D); break; + case SystemZ::TLS_GDCALL: + LoweredMI = MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R14D) + .addExpr(getTLSGetOffset(MF->getContext())) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSGD)); + break; + + case SystemZ::TLS_LDCALL: + LoweredMI = MCInstBuilder(SystemZ::BRASL) + .addReg(SystemZ::R14D) + .addExpr(getTLSGetOffset(MF->getContext())) + .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSLDM)); + break; + + case SystemZ::GOT: + LoweredMI = MCInstBuilder(SystemZ::LARL) + .addReg(MI->getOperand(0).getReg()) + .addExpr(getGlobalOffsetTable(MF->getContext())); + break; + case SystemZ::IILF64: LoweredMI = MCInstBuilder(SystemZ::IILF) .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) @@ -172,6 +206,9 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { static MCSymbolRefExpr::VariantKind getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) { switch (Modifier) { + case SystemZCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD; + case SystemZCP::TLSLDM: return MCSymbolRefExpr::VK_TLSLDM; + case SystemZCP::DTPOFF: return MCSymbolRefExpr::VK_DTPOFF; case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF; } llvm_unreachable("Invalid SystemCPModifier!"); diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp index 19cec219e2d..44ea1d25f08 100644 --- a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp +++ b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp @@ -28,6 +28,11 @@ SystemZConstantPoolValue::Create(const GlobalValue *GV, unsigned SystemZConstantPoolValue::getRelocationInfo() const { switch (Modifier) { + case SystemZCP::TLSGD: + case SystemZCP::TLSLDM: + case SystemZCP::DTPOFF: + // May require a dynamic relocation. + return 2; case SystemZCP::NTPOFF: // May require a relocation, but the relocations are always resolved // by the static linker. diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.h b/lib/Target/SystemZ/SystemZConstantPoolValue.h index 0bd8c205ea4..e5f1bb18581 100644 --- a/lib/Target/SystemZ/SystemZConstantPoolValue.h +++ b/lib/Target/SystemZ/SystemZConstantPoolValue.h @@ -19,13 +19,17 @@ class GlobalValue; namespace SystemZCP { enum SystemZCPModifier { + TLSGD, + TLSLDM, + DTPOFF, NTPOFF }; } // end namespace SystemZCP /// A SystemZ-specific constant pool value. At present, the only -/// defined constant pool values are offsets of thread-local variables -/// (written x@NTPOFF). +/// defined constant pool values are module IDs or offsets of +/// thread-local variables (written x@TLSGD, x@TLSLDM, x@DTPOFF, +/// or x@NTPOFF). class SystemZConstantPoolValue : public MachineConstantPoolValue { const GlobalValue *GV; SystemZCP::SystemZCPModifier Modifier; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 4bcf78656a3..d92ad6a0054 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1830,6 +1830,52 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, return Result; } +SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, + SelectionDAG &DAG, + unsigned Opcode, + SDValue GOTOffset) const { + SDLoc DL(Node); + EVT PtrVT = getPointerTy(); + SDValue Chain = DAG.getEntryNode(); + SDValue Glue; + + // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. + SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); + Glue = Chain.getValue(1); + Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); + Glue = Chain.getValue(1); + + // The first call operand is the chain and the second is the TLS symbol. + SmallVector Ops; + Ops.push_back(Chain); + Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, + Node->getValueType(0), + 0, 0)); + + // Add argument registers to the end of the list so that they are + // known live into the call. + Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); + Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallingConv::C); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Glue the call to the argument copies. + Ops.push_back(Glue); + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); + Glue = Chain.getValue(1); + + // Copy the return value from %r2. + return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); +} + SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const { SDLoc DL(Node); @@ -1837,9 +1883,6 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, EVT PtrVT = getPointerTy(); TLSModel::Model model = DAG.getTarget().getTLSModel(GV); - if (model != TLSModel::LocalExec) - llvm_unreachable("only local-exec TLS mode supported"); - // The high part of the thread pointer is in access register 0. SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, DAG.getConstant(0, MVT::i32)); @@ -1855,15 +1898,82 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, DAG.getConstant(32, PtrVT)); SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); - // Get the offset of GA from the thread pointer. - SystemZConstantPoolValue *CPV = - SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); + // Get the offset of GA from the thread pointer, based on the TLS model. + SDValue Offset; + switch (model) { + case TLSModel::GeneralDynamic: { + // Load the GOT offset of the tls_index (module ID / per-symbol offset). + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + + // Call __tls_get_offset to retrieve the offset. + Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); + break; + } + + case TLSModel::LocalDynamic: { + // Load the GOT offset of the module ID. + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + + // Call __tls_get_offset to retrieve the module base offset. + Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); + + // Note: The SystemZLDCleanupPass will remove redundant computations + // of the module base offset. Count total number of local-dynamic + // accesses to trigger execution of that pass. + SystemZMachineFunctionInfo* MFI = + DAG.getMachineFunction().getInfo(); + MFI->incNumLocalDynamicTLSAccesses(); + + // Add the per-symbol offset. + CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); + + SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); + DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + DTPOffset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); - // Force the offset into the constant pool and load it from there. - SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8); - SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), - CPAddr, MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); + break; + } + + case TLSModel::InitialExec: { + // Load the offset from the GOT. + Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, + SystemZII::MO_INDNTPOFF); + Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getGOT(), + false, false, false, 0); + break; + } + + case TLSModel::LocalExec: { + // Force the offset into the constant pool and load it from there. + SystemZConstantPoolValue *CPV = + SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); + + Offset = DAG.getConstantPool(CPV, PtrVT, 8); + Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), + Offset, MachinePointerInfo::getConstantPool(), + false, false, false, 0); + break; + } + + default: + llvm_unreachable("Unknown TLS model."); + } // Add the base and offset together. return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 3922721baf9..fa58b8f553b 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -34,6 +34,11 @@ enum { CALL, SIBCALL, + // TLS calls. Like regular calls, except operand 1 is the TLS symbol. + // (The call target is implicitly __tls_get_offset.) + TLS_GDCALL, + TLS_LDCALL, + // Wraps a TargetGlobalAddress that should be loaded using PC-relative // accesses (LARL). Operand 0 is the address. PCREL_WRAPPER, @@ -258,6 +263,9 @@ private: SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const; + SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node, + SelectionDAG &DAG, unsigned Opcode, + SDValue GOTOffset) const; SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SelectionDAG &DAG) const; SDValue lowerBlockAddress(BlockAddressSDNode *Node, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index d2e3f541f80..e711f892c1c 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -56,10 +56,13 @@ static inline unsigned getCompareZeroCCMask(unsigned int Flags) { // SystemZ MachineOperand target flags. enum { // Masks out the bits for the access model. - MO_SYMBOL_MODIFIER = (1 << 0), + MO_SYMBOL_MODIFIER = (3 << 0), // @GOT (aka @GOTENT) - MO_GOT = (1 << 0) + MO_GOT = (1 << 0), + + // @INDNTPOFF + MO_INDNTPOFF = (2 << 0) }; // Classifies a branch. enum BranchType { diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 0f752640e7d..a7f774791d4 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -249,6 +249,15 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>; } +// TLS calls. These will be lowered into a call to __tls_get_offset, +// with an extra relocation specifying the TLS symbol. +let isCall = 1, Defs = [R14D, CC] in { + def TLS_GDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops), + [(z_tls_gdcall tglobaltlsaddr:$I2)]>; + def TLS_LDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops), + [(z_tls_ldcall tglobaltlsaddr:$I2)]>; +} + // Define the general form of the call instructions for the asm parser. // These instructions don't hard-code %r14 as the return address register. // Allow an optional TLS marker symbol to generate TLS call relocations. @@ -588,6 +597,12 @@ let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1, [(set GR64:$R1, pcrel32:$I2)]>; } +// Load the Global Offset Table address. This will be lowered into a +// larl $R1, _GLOBAL_OFFSET_TABLE_ +// instruction. +def GOT : Alias<6, (outs GR64:$R1), (ins), + [(set GR64:$R1, (global_offset_table))]>; + //===----------------------------------------------------------------------===// // Absolute and Negation //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZLDCleanup.cpp b/lib/Target/SystemZ/SystemZLDCleanup.cpp new file mode 100644 index 00000000000..24165be29ae --- /dev/null +++ b/lib/Target/SystemZ/SystemZLDCleanup.cpp @@ -0,0 +1,143 @@ +//===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass combines multiple accesses to local-dynamic TLS variables so that +// the TLS base address for the module is only fetched once per execution path +// through the function. +// +//===----------------------------------------------------------------------===// + +#include "SystemZTargetMachine.h" +#include "SystemZMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +namespace { + +class SystemZLDCleanup : public MachineFunctionPass { +public: + static char ID; + SystemZLDCleanup(const SystemZTargetMachine &tm) + : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {} + + const char *getPassName() const override { + return "SystemZ Local Dynamic TLS Access Clean-up"; + } + + bool runOnMachineFunction(MachineFunction &MF) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg); + MachineInstr *ReplaceTLSCall(MachineInstr *I, unsigned TLSBaseAddrReg); + MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg); + + const SystemZInstrInfo *TII; + MachineFunction *MF; +}; + +char SystemZLDCleanup::ID = 0; + +} // end anonymous namespace + +FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) { + return new SystemZLDCleanup(TM); +} + +void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) { + TII = static_cast(F.getSubtarget().getInstrInfo()); + MF = &F; + + SystemZMachineFunctionInfo* MFI = F.getInfo(); + if (MFI->getNumLocalDynamicTLSAccesses() < 2) { + // No point folding accesses if there isn't at least two. + return false; + } + + MachineDominatorTree *DT = &getAnalysis(); + return VisitNode(DT->getRootNode(), 0); +} + +// Visit the dominator subtree rooted at Node in pre-order. +// If TLSBaseAddrReg is non-null, then use that to replace any +// TLS_LDCALL instructions. Otherwise, create the register +// when the first such instruction is seen, and then use it +// as we encounter more instructions. +bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node, + unsigned TLSBaseAddrReg) { + MachineBasicBlock *BB = Node->getBlock(); + bool Changed = false; + + // Traverse the current block. + for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { + switch (I->getOpcode()) { + case SystemZ::TLS_LDCALL: + if (TLSBaseAddrReg) + I = ReplaceTLSCall(I, TLSBaseAddrReg); + else + I = SetRegister(I, &TLSBaseAddrReg); + Changed = true; + break; + default: + break; + } + } + + // Visit the children of this block in the dominator tree. + for (auto I = Node->begin(), E = Node->end(); I != E; ++I) + Changed |= VisitNode(*I, TLSBaseAddrReg); + + return Changed; +} + +// Replace the TLS_LDCALL instruction I with a copy from TLSBaseAddrReg, +// returning the new instruction. +MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I, + unsigned TLSBaseAddrReg) { + // Insert a Copy from TLSBaseAddrReg to R2. + MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), SystemZ::R2D) + .addReg(TLSBaseAddrReg); + + // Erase the TLS_LDCALL instruction. + I->eraseFromParent(); + + return Copy; +} + +// Create a virtal register in *TLSBaseAddrReg, and populate it by +// inserting a copy instruction after I. Returns the new instruction. +MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I, + unsigned *TLSBaseAddrReg) { + // Create a virtual register for the TLS base address. + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(&SystemZ::GR64BitRegClass); + + // Insert a copy from R2 to TLSBaseAddrReg. + MachineInstr *Next = I->getNextNode(); + MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), + TII->get(TargetOpcode::COPY), *TLSBaseAddrReg) + .addReg(SystemZ::R2D); + + return Copy; +} + diff --git a/lib/Target/SystemZ/SystemZMCInstLower.cpp b/lib/Target/SystemZ/SystemZMCInstLower.cpp index df561e2d800..6bb96f1b40f 100644 --- a/lib/Target/SystemZ/SystemZMCInstLower.cpp +++ b/lib/Target/SystemZ/SystemZMCInstLower.cpp @@ -22,6 +22,8 @@ static MCSymbolRefExpr::VariantKind getVariantKind(unsigned Flags) { return MCSymbolRefExpr::VK_None; case SystemZII::MO_GOT: return MCSymbolRefExpr::VK_GOT; + case SystemZII::MO_INDNTPOFF: + return MCSymbolRefExpr::VK_INDNTPOFF; } llvm_unreachable("Unrecognised MO_ACCESS_MODEL"); } diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h index 92c2ce7324a..34fc36d6bf6 100644 --- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -23,11 +23,13 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo { unsigned VarArgsFrameIndex; unsigned RegSaveFrameIndex; bool ManipulatesSP; + unsigned NumLocalDynamics; public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), - VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false) {} + VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false), + NumLocalDynamics(0) {} // Get and set the first call-saved GPR that should be saved and restored // by this function. This is 0 if no GPRs need to be saved or restored. @@ -61,6 +63,10 @@ public: // e.g. through STACKSAVE or STACKRESTORE. bool getManipulatesSP() const { return ManipulatesSP; } void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; } + + // Count number of local-dynamic TLS symbols used. + unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } + void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } }; } // end namespace llvm diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index c70e662db42..51ac5daad54 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -90,6 +90,7 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>; +def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>; // Nodes for SystemZISD::*. See SystemZISelLowering.h for more details. def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone, @@ -100,6 +101,12 @@ def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall, def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; +def z_tls_gdcall : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + SDNPVariadic]>; +def z_tls_ldcall : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue, + SDNPVariadic]>; def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET", SDT_ZWrapOffset, []>; diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 9b8dc43d94f..73198b1af0a 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -61,6 +61,10 @@ void SystemZPassConfig::addIRPasses() { bool SystemZPassConfig::addInstSelector() { addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel())); + + if (getOptLevel() != CodeGenOpt::None) + addPass(createSystemZLDCleanupPass(getSystemZTargetMachine())); + return false; } diff --git a/test/CodeGen/SystemZ/tls-01.ll b/test/CodeGen/SystemZ/tls-01.ll index 16bc8f6e500..da7176c0599 100644 --- a/test/CodeGen/SystemZ/tls-01.ll +++ b/test/CodeGen/SystemZ/tls-01.ll @@ -1,7 +1,7 @@ -; Test initial-exec TLS accesses. +; Test local-exec TLS accesses. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP +; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN +; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP @x = thread_local global i32 0 diff --git a/test/CodeGen/SystemZ/tls-02.ll b/test/CodeGen/SystemZ/tls-02.ll new file mode 100644 index 00000000000..15918d08a93 --- /dev/null +++ b/test/CodeGen/SystemZ/tls-02.ll @@ -0,0 +1,18 @@ +; Test initial-exec TLS accesses. +; +; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN + +@x = thread_local(initialexec) global i32 0 + +; The offset must be loaded from the GOT. This TLS access model does +; not use literal pool constants. +define i32 *@foo() { +; CHECK-MAIN-LABEL: foo: +; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0 +; CHECK-MAIN: sllg %r2, [[HIGH]], 32 +; CHECK-MAIN: ear %r2, %a1 +; CHECK-MAIN: larl %r1, x@INDNTPOFF +; CHECK-MAIN: ag %r2, 0(%r1) +; CHECK-MAIN: br %r14 + ret i32 *@x +} diff --git a/test/CodeGen/SystemZ/tls-03.ll b/test/CodeGen/SystemZ/tls-03.ll new file mode 100644 index 00000000000..c9f7bd63290 --- /dev/null +++ b/test/CodeGen/SystemZ/tls-03.ll @@ -0,0 +1,23 @@ +; Test general-dynamic TLS accesses. +; +; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN +; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-CP + +@x = thread_local global i32 0 + +; Call __tls_get_offset to retrieve the symbol's TLS offset. +define i32 *@foo() { +; CHECK-CP: .LCP{{.*}}: +; CHECK-CP: .quad x@TLSGD +; +; CHECK-MAIN-LABEL: foo: +; CHECK-MAIN-DAG: larl %r12, _GLOBAL_OFFSET_TABLE_ +; CHECK-MAIN-DAG: lgrl %r2, .LCP{{.*}} +; CHECK-MAIN: brasl %r14, __tls_get_offset@PLT:tls_gdcall:x +; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0 +; CHECK-MAIN: sllg [[TP:%r[0-5]]], [[HIGH]], 32 +; CHECK-MAIN: ear [[TP]], %a1 +; CHECK-MAIN: agr %r2, [[TP]] +; CHECK-MAIN: br %r14 + ret i32 *@x +} diff --git a/test/CodeGen/SystemZ/tls-04.ll b/test/CodeGen/SystemZ/tls-04.ll new file mode 100644 index 00000000000..dcb210a7127 --- /dev/null +++ b/test/CodeGen/SystemZ/tls-04.ll @@ -0,0 +1,28 @@ +; Test local-dynamic TLS accesses. +; +; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN +; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-CP + +@x = thread_local(localdynamic) global i32 0 + +; Call __tls_get_offset to retrieve the module's TLS base offset. +; Add the per-symbol offset and the thread pointer. +define i32 *@foo() { +; CHECK-CP: .LCP{{.*}}_0: +; CHECK-CP: .quad x@TLSLDM +; CHECK-CP: .LCP{{.*}}_1: +; CHECK-CP: .quad x@DTPOFF +; +; CHECK-MAIN-LABEL: foo: +; CHECK-MAIN-DAG: larl %r12, _GLOBAL_OFFSET_TABLE_ +; CHECK-MAIN-DAG: lgrl %r2, .LCP{{.*}}_0 +; CHECK-MAIN: brasl %r14, __tls_get_offset@PLT:tls_ldcall:x +; CHECK-MAIN: larl %r1, .LCP{{.*}}_1 +; CHECK-MAIN: ag %r2, 0(%r1) +; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0 +; CHECK-MAIN: sllg [[TP:%r[0-5]]], [[HIGH]], 32 +; CHECK-MAIN: ear [[TP]], %a1 +; CHECK-MAIN: agr %r2, [[TP]] +; CHECK-MAIN: br %r14 + ret i32 *@x +} diff --git a/test/CodeGen/SystemZ/tls-05.ll b/test/CodeGen/SystemZ/tls-05.ll new file mode 100644 index 00000000000..385208d98ca --- /dev/null +++ b/test/CodeGen/SystemZ/tls-05.ll @@ -0,0 +1,15 @@ +; Test general-dynamic TLS access optimizations. +; +; If we access the same TLS variable twice, there should only be +; a single call to __tls_get_offset. +; +; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 1 + +@x = thread_local global i32 0 + +define i32 @foo() { + %val = load i32* @x + %inc = add nsw i32 %val, 1 + store i32 %inc, i32* @x + ret i32 %val +} diff --git a/test/CodeGen/SystemZ/tls-06.ll b/test/CodeGen/SystemZ/tls-06.ll new file mode 100644 index 00000000000..fcd8614a793 --- /dev/null +++ b/test/CodeGen/SystemZ/tls-06.ll @@ -0,0 +1,17 @@ +; Test general-dynamic TLS access optimizations. +; +; If we access two different TLS variables, we need two calls to +; __tls_get_offset, but should load _GLOBAL_OFFSET_TABLE only once. +; +; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 2 +; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "_GLOBAL_OFFSET_TABLE_" | count 1 + +@x = thread_local global i32 0 +@y = thread_local global i32 0 + +define i32 @foo() { + %valx = load i32* @x + %valy = load i32* @y + %add = add nsw i32 %valx, %valy + ret i32 %add +} diff --git a/test/CodeGen/SystemZ/tls-07.ll b/test/CodeGen/SystemZ/tls-07.ll new file mode 100644 index 00000000000..65475159230 --- /dev/null +++ b/test/CodeGen/SystemZ/tls-07.ll @@ -0,0 +1,16 @@ +; Test local-dynamic TLS access optimizations. +; +; If we access two different local-dynamic TLS variables, we only +; need a single call to __tls_get_offset. +; +; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 1 + +@x = thread_local(localdynamic) global i32 0 +@y = thread_local(localdynamic) global i32 0 + +define i32 @foo() { + %valx = load i32* @x + %valy = load i32* @y + %add = add nsw i32 %valx, %valy + ret i32 %add +} -- 2.34.1