From 6aac92b939ff846344ae7d60bac91d03b56d8c06 Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Fri, 11 Dec 2015 18:24:30 +0000 Subject: [PATCH] CXX_FAST_TLS calling convention: target independent portion. The access function has a short entry and a short exit, the initialization block is only run the first time. To improve the performance, we want to have a short frame at the entry and exit. We explicitly handle most of the CSRs via copies. Only the CSRs that are not handled via copies will be in CSR_SaveList. Frame lowering and prologue/epilogue insertion will generate a short frame in the entry and exit according to CSR_SaveList. The majority of the CSRs will be handled by register allcoator. Register allocator will try to spill and reload them in the initialization block. We add CSRsViaCopy, it will be explicitly handled during lowering. 1> we first set FunctionLoweringInfo->SplitCSR if conditions are met (the target supports it for the given calling convention and the function has only return exits). We also call TLI->initializeSplitCSR to perform initialization. 2> we call TLI->insertCopiesSplitCSR to insert copies from CSRsViaCopy to virtual registers at beginning of the entry block and copies from virtual registers to CSRsViaCopy at beginning of the exit blocks. 3> we also need to make sure the explicit copies will not be eliminated. rdar://problem/23557469 Differential Revision: http://reviews.llvm.org/D15340 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255353 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/FunctionLoweringInfo.h | 3 ++ include/llvm/Target/TargetLowering.h | 23 ++++++++++++ include/llvm/Target/TargetRegisterInfo.h | 5 +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 37 ++++++++++++++++++- 4 files changed, 67 insertions(+), 1 deletion(-) diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h index bd8da736c16..09a9991912d 100644 --- a/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -62,6 +62,9 @@ public: /// registers. bool CanLowerReturn; + /// True if part of the CSRs will be handled via explicit copies. + bool SplitCSR; + /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg /// allocated to hold a pointer to the hidden sret parameter. unsigned DemoteRegister; diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index f7152565f91..a105917958d 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -2263,6 +2263,29 @@ public: return false; } + /// Return true if the target supports that a subset of CSRs for the given + /// calling convention is handled explicitly via copies. + virtual bool supportSplitCSR(CallingConv::ID CC) const { + return false; + } + + /// Perform necessary initialization to handle a subset of CSRs explicitly + /// via copies. This function is called at the beginning of instruction + /// selection. + virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { + llvm_unreachable("Not Implemented"); + } + + /// Insert explicit copies in entry and exit blocks. We copy a subset of + /// CSRs to virtual registers in the entry block, and copy them back to + /// physical registers in the exit blocks. This function is called at the end + /// of instruction selection. + virtual void insertCopiesSplitCSR( + MachineBasicBlock *Entry, + const SmallVectorImpl &Exits) const { + llvm_unreachable("Not Implemented"); + } + //===--------------------------------------------------------------------===// // Lowering methods - These methods must be implemented by targets so that // the SelectionDAGBuilder code knows how to lower these. diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 414255edb23..ffd510440b0 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -426,6 +426,11 @@ public: virtual const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const = 0; + virtual const MCPhysReg* + getCalleeSavedRegsViaCopy(const MachineFunction *MF) const { + return nullptr; + } + /// Return a mask of call-preserved registers for the given calling convention /// on the current function. The mask should include all call-preserved /// aliases. This is used by the register allocator to determine which diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index f6c5d90f47a..5f3e4a02341 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -467,15 +467,50 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MF->setHasInlineAsm(false); + FuncInfo->SplitCSR = false; + SmallVector Returns; + + // We split CSR if the target supports it for the given calling convention + // and the function has only return exits. + if (TLI->supportSplitCSR(Fn.getCallingConv())) { + FuncInfo->SplitCSR = true; + + // Collect all the return blocks. + for (const BasicBlock &BB : Fn) { + if (!succ_empty(&BB)) + continue; + + const TerminatorInst *Term = BB.getTerminator(); + if (isa(Term)) + continue; + if (isa(Term)) { + Returns.push_back(FuncInfo->MBBMap[&BB]); + continue; + } + + // Bail out if the exit block is not Return nor Unreachable. + FuncInfo->SplitCSR = false; + break; + } + } + + MachineBasicBlock *EntryMBB = &MF->front(); + if (FuncInfo->SplitCSR) + // This performs initialization so lowering for SplitCSR will be correct. + TLI->initializeSplitCSR(EntryMBB); + SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be // copied into vregs, emit the copies into the top of the block before // emitting the code for the block. - MachineBasicBlock *EntryMBB = &MF->front(); const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); + // Insert copies in the entry block and the return blocks. + if (FuncInfo->SplitCSR) + TLI->insertCopiesSplitCSR(EntryMBB, Returns); + DenseMap LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), -- 2.34.1