From: Lang Hames Date: Mon, 19 Oct 2015 17:43:51 +0000 (+0000) Subject: [Orc] Add support for emitting indirect stubs directly into the JIT target's X-Git-Url: http://plrg.eecs.uci.edu/git/?p=oota-llvm.git;a=commitdiff_plain;h=65040b23fba7246cdffcd79e4f5a0ab2ca5bfc48 [Orc] Add support for emitting indirect stubs directly into the JIT target's memory, rather than representing the stubs in IR. Update the CompileOnDemand layer to use this functionality. Directly emitting stubs is much cheaper than building them in IR and codegen'ing them (see below). It also plays well with remote JITing - stubs can be emitted directly in the target process, rather than having to send them over the wire. The downsides are: (1) Care must be taken when resolving symbols, as stub symbols are held in a separate symbol table. This is only a problem for layer writers and other people using this API directly. The CompileOnDemand layer hides this detail. (2) Aliases of function stubs can't be symbolic any more (since there's no symbol definition in IR), but must be converted into a constant pointer expression. This means that modules containing aliases of stubs cannot be cached. In practice this is unlikely to be a problem: There's no benefit to caching such a module anyway. On balance I think the extra performance is more than worth the trade-offs: In a simple stress test with 10000 dummy functions requiring stubs and a single executed "hello world" main function, directly emitting stubs reduced user time for JITing / executing by over 90% (1.5s for IR stubs vs 0.1s for direct emission). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@250712 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index ff180c21b24..62647680aa6 100644 --- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -36,42 +36,27 @@ namespace orc { /// added to the layer below. When a stub is called it triggers the extraction /// of the function body from the original module. The extracted body is then /// compiled and executed. -template (Function&)>> +template class CompileOnDemandLayer { private: - // Utility class for MapValue. Only materializes declarations for global - // variables. - class GlobalDeclMaterializer : public ValueMaterializer { + template + class LambdaMaterializer : public ValueMaterializer { public: - typedef std::set StubSet; - - GlobalDeclMaterializer(Module &Dst, const StubSet *StubsToClone = nullptr) - : Dst(Dst), StubsToClone(StubsToClone) {} - + LambdaMaterializer(MaterializerFtor M) : M(std::move(M)) {} Value* materializeValueFor(Value *V) final { - if (auto *GV = dyn_cast(V)) - return cloneGlobalVariableDecl(Dst, *GV); - else if (auto *F = dyn_cast(V)) { - auto *ClonedF = cloneFunctionDecl(Dst, *F); - if (StubsToClone && StubsToClone->count(F)) { - GlobalVariable *FnBodyPtr = - createImplPointer(*ClonedF->getType(), *ClonedF->getParent(), - ClonedF->getName() + "$orc_addr", nullptr); - makeStub(*ClonedF, *FnBodyPtr); - ClonedF->setLinkage(GlobalValue::AvailableExternallyLinkage); - ClonedF->addFnAttr(Attribute::AlwaysInline); - } - return ClonedF; - } - // Else. - return nullptr; + return M(V); } private: - Module &Dst; - const StubSet *StubsToClone; + MaterializerFtor M; + }; + + template + LambdaMaterializer + createLambdaMaterializer(MaterializerFtor M) { + return LambdaMaterializer(std::move(M)); }; typedef typename BaseLayerT::ModuleSetHandleT BaseLayerModuleSetHandleT; @@ -79,6 +64,16 @@ private: struct LogicalModuleResources { std::shared_ptr SourceModule; std::set StubsToClone; + std::unique_ptr StubsMgr; + + JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) { + if (Name.endswith("$stub_ptr") && !ExportedSymbolsOnly) { + assert(!ExportedSymbolsOnly && "Stubs are never exported"); + return StubsMgr->findPointer(Name.drop_back(9)); + } + return StubsMgr->findStub(Name, ExportedSymbolsOnly); + } + }; struct LogicalDylibResources { @@ -94,15 +89,25 @@ private: typedef std::list LogicalDylibList; public: + /// @brief Handle to a set of loaded modules. typedef typename LogicalDylibList::iterator ModuleSetHandleT; + /// @brief Module partitioning functor. + typedef std::function(Function&)> PartitioningFtor; + + /// @brief Builder for IndirectStubsManagers. + typedef std::function()> + IndirectStubsManagerBuilderT; + /// @brief Construct a compile-on-demand layer instance. - CompileOnDemandLayer(BaseLayerT &BaseLayer, CompileCallbackMgrT &CallbackMgr, - PartitioningFtor Partition, + CompileOnDemandLayer(BaseLayerT &BaseLayer, PartitioningFtor Partition, + CompileCallbackMgrT &CallbackMgr, + IndirectStubsManagerBuilderT CreateIndirectStubsManager, bool CloneStubsIntoPartitions = true) - : BaseLayer(BaseLayer), CompileCallbackMgr(CallbackMgr), - Partition(Partition), + : BaseLayer(BaseLayer), Partition(Partition), + CompileCallbackMgr(CallbackMgr), + CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)), CloneStubsIntoPartitions(CloneStubsIntoPartitions) {} /// @brief Add a module to the compile-on-demand layer. @@ -144,7 +149,11 @@ public: /// @param ExportedSymbolsOnly If true, search only for exported symbols. /// @return A handle for the given named symbol, if it exists. JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) { - return BaseLayer.findSymbol(Name, ExportedSymbolsOnly); + for (auto LDI = LogicalDylibs.begin(), LDE = LogicalDylibs.end(); + LDI != LDE; ++LDI) + if (auto Symbol = findSymbolIn(LDI, Name, ExportedSymbolsOnly)) + return Symbol; + return nullptr; } /// @brief Get the address of a symbol provided by this layer, or some layer @@ -165,80 +174,127 @@ private: // Create a logical module handle for SrcM within the logical dylib. auto LMH = LD.createLogicalModule(); auto &LMResources = LD.getLogicalModuleResources(LMH); + LMResources.SourceModule = SrcM; - // Create the GVs-and-stubs module. - auto GVsAndStubsM = llvm::make_unique( - (SrcM->getName() + ".globals_and_stubs").str(), - SrcM->getContext()); - GVsAndStubsM->setDataLayout(SrcM->getDataLayout()); + // Create the GlobalValues module. + const DataLayout &DL = SrcM->getDataLayout(); + auto GVsM = llvm::make_unique((SrcM->getName() + ".globals").str(), + SrcM->getContext()); + GVsM->setDataLayout(DL); + + // Create function stubs. ValueToValueMapTy VMap; + { + typename IndirectStubsMgrT::StubInitsMap StubInits; + for (auto &F : *SrcM) { + // Skip declarations. + if (F.isDeclaration()) + continue; + + // Record all functions defined by this module. + if (CloneStubsIntoPartitions) + LMResources.StubsToClone.insert(&F); + + // Create a callback, associate it with the stub for the function, + // and set the compile action to compile the partition containing the + // function. + auto CCInfo = CompileCallbackMgr.getCompileCallback(SrcM->getContext()); + StubInits[mangle(F.getName(), DL)] = + std::make_pair(CCInfo.getAddress(), + JITSymbolBase::flagsFromGlobalValue(F)); + CCInfo.setCompileAction( + [this, &LD, LMH, &F]() { + return this->extractAndCompile(LD, LMH, F); + }); + } - // Process module and create stubs. - // We create the stubs before copying the global variables as we know the - // stubs won't refer to any globals (they only refer to their implementation - // pointer) so there's no ordering/value-mapping issues. - for (auto &F : *SrcM) { - - // Skip declarations. - if (F.isDeclaration()) - continue; - - // Record all functions defined by this module. - if (CloneStubsIntoPartitions) - LMResources.StubsToClone.insert(&F); - - // For each definition: create a callback, a stub, and a function body - // pointer. Initialize the function body pointer to point at the callback, - // and set the callback to compile the function body. - auto CCInfo = CompileCallbackMgr.getCompileCallback(SrcM->getContext()); - Function *StubF = cloneFunctionDecl(*GVsAndStubsM, F, &VMap); - GlobalVariable *FnBodyPtr = - createImplPointer(*StubF->getType(), *StubF->getParent(), - StubF->getName() + "$orc_addr", - createIRTypedAddress(*StubF->getFunctionType(), - CCInfo.getAddress())); - makeStub(*StubF, *FnBodyPtr); - CCInfo.setCompileAction( - [this, &LD, LMH, &F]() { - return this->extractAndCompile(LD, LMH, F); - }); + LMResources.StubsMgr = CreateIndirectStubsManager(); + auto EC = LMResources.StubsMgr->init(StubInits); + (void)EC; + // FIXME: This should be propagated back to the user. Stub creation may + // fail for remote JITs. + assert(!EC && "Error generating stubs"); } - // Now clone the global variable declarations. - GlobalDeclMaterializer GDMat(*GVsAndStubsM); + // Clone global variable decls. for (auto &GV : SrcM->globals()) - if (!GV.isDeclaration()) - cloneGlobalVariableDecl(*GVsAndStubsM, GV, &VMap); + if (!GV.isDeclaration() && !VMap.count(&GV)) + cloneGlobalVariableDecl(*GVsM, GV, &VMap); // And the aliases. - for (auto &Alias : SrcM->aliases()) - cloneGlobalAlias(*GVsAndStubsM, Alias, VMap, &GDMat); + for (auto &A : SrcM->aliases()) + if (!VMap.count(&A)) + cloneGlobalAliasDecl(*GVsM, A, VMap); + + // Now we need to clone the GV and alias initializers. + + // Initializers may refer to functions declared (but not defined) in this + // module. Build a materializer to clone decls on demand. + auto Materializer = createLambdaMaterializer( + [&GVsM, &LMResources](Value *V) -> Value* { + if (auto *F = dyn_cast(V)) { + // Decls in the original module just get cloned. + if (F->isDeclaration()) + return cloneFunctionDecl(*GVsM, *F); + + // Definitions in the original module (which we have emitted stubs + // for at this point) get turned into a constant alias to the stub + // instead. + const DataLayout &DL = GVsM->getDataLayout(); + std::string FName = mangle(F->getName(), DL); + auto StubSym = LMResources.StubsMgr->findStub(FName, false); + unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(F->getType()); + ConstantInt *StubAddr = + ConstantInt::get(GVsM->getContext(), + APInt(PtrBitWidth, StubSym.getAddress())); + Constant *Init = ConstantExpr::getCast(Instruction::IntToPtr, + StubAddr, F->getType()); + return GlobalAlias::create(F->getFunctionType(), + F->getType()->getAddressSpace(), + F->getLinkage(), F->getName(), + Init, GVsM.get()); + } + // else.... + return nullptr; + }); - // Then clone the initializers. + // Clone the global variable initializers. for (auto &GV : SrcM->globals()) if (!GV.isDeclaration()) - moveGlobalVariableInitializer(GV, VMap, &GDMat); + moveGlobalVariableInitializer(GV, VMap, &Materializer); + + // Clone the global alias initializers. + for (auto &A : SrcM->aliases()) { + auto *NewA = cast(VMap[&A]); + assert(NewA && "Alias not cloned?"); + Value *Init = MapValue(A.getAliasee(), VMap, RF_None, nullptr, + &Materializer); + NewA->setAliasee(cast(Init)); + } - // Build a resolver for the stubs module and add it to the base layer. - auto GVsAndStubsResolver = createLambdaResolver( - [&LD](const std::string &Name) { + // Build a resolver for the globals module and add it to the base layer. + auto GVsResolver = createLambdaResolver( + [&LD, LMH](const std::string &Name) { + auto &LMResources = LD.getLogicalModuleResources(LMH); + if (auto Sym = LMResources.StubsMgr->findStub(Name, false)) + return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags()); return LD.getDylibResources().ExternalSymbolResolver(Name); }, [](const std::string &Name) { return RuntimeDyld::SymbolInfo(nullptr); }); - std::vector> GVsAndStubsMSet; - GVsAndStubsMSet.push_back(std::move(GVsAndStubsM)); - auto GVsAndStubsH = - BaseLayer.addModuleSet(std::move(GVsAndStubsMSet), + std::vector> GVsMSet; + GVsMSet.push_back(std::move(GVsM)); + auto GVsH = + BaseLayer.addModuleSet(std::move(GVsMSet), llvm::make_unique(), - std::move(GVsAndStubsResolver)); - LD.addToLogicalModule(LMH, GVsAndStubsH); + std::move(GVsResolver)); + LD.addToLogicalModule(LMH, GVsH); } - static std::string Mangle(StringRef Name, const DataLayout &DL) { + static std::string mangle(StringRef Name, const DataLayout &DL) { std::string MangledName; { raw_string_ostream MangledNameStream(MangledName); @@ -250,42 +306,35 @@ private: TargetAddress extractAndCompile(CODLogicalDylib &LD, LogicalModuleHandle LMH, Function &F) { - Module &SrcM = *LD.getLogicalModuleResources(LMH).SourceModule; + auto &LMResources = LD.getLogicalModuleResources(LMH); + Module &SrcM = *LMResources.SourceModule; // If F is a declaration we must already have compiled it. if (F.isDeclaration()) return 0; // Grab the name of the function being called here. - std::string CalledFnName = Mangle(F.getName(), SrcM.getDataLayout()); + std::string CalledFnName = mangle(F.getName(), SrcM.getDataLayout()); auto Part = Partition(F); auto PartH = emitPartition(LD, LMH, Part); TargetAddress CalledAddr = 0; for (auto *SubF : Part) { - std::string FName = SubF->getName(); - auto FnBodySym = - BaseLayer.findSymbolIn(PartH, Mangle(FName, SrcM.getDataLayout()), - false); - auto FnPtrSym = - BaseLayer.findSymbolIn(*LD.moduleHandlesBegin(LMH), - Mangle(FName + "$orc_addr", - SrcM.getDataLayout()), - false); + std::string FnName = mangle(SubF->getName(), SrcM.getDataLayout()); + auto FnBodySym = BaseLayer.findSymbolIn(PartH, FnName, false); assert(FnBodySym && "Couldn't find function body."); - assert(FnPtrSym && "Couldn't find function body pointer."); TargetAddress FnBodyAddr = FnBodySym.getAddress(); - void *FnPtrAddr = reinterpret_cast( - static_cast(FnPtrSym.getAddress())); // If this is the function we're calling record the address so we can // return it from this function. if (SubF == &F) CalledAddr = FnBodyAddr; - memcpy(FnPtrAddr, &FnBodyAddr, sizeof(uintptr_t)); + // Update the function body pointer for the stub. + if (auto EC = LMResources.StubsMgr->updatePointer(FnName, FnBodyAddr)) + return 0; } return CalledAddr; @@ -308,7 +357,43 @@ private: auto M = llvm::make_unique(NewName, SrcM.getContext()); M->setDataLayout(SrcM.getDataLayout()); ValueToValueMapTy VMap; - GlobalDeclMaterializer GDM(*M, &LMResources.StubsToClone); + + auto Materializer = createLambdaMaterializer( + [this, &LMResources, &M, &VMap](Value *V) -> Value* { + if (auto *GV = dyn_cast(V)) { + return cloneGlobalVariableDecl(*M, *GV); + } else if (auto *F = dyn_cast(V)) { + // Check whether we want to clone an available_externally definition. + if (LMResources.StubsToClone.count(F)) { + // Ok - we want an inlinable stub. For that to work we need a decl + // for the stub pointer. + auto *StubPtr = createImplPointer(*F->getType(), *M, + F->getName() + "$stub_ptr", + nullptr); + auto *ClonedF = cloneFunctionDecl(*M, *F); + makeStub(*ClonedF, *StubPtr); + ClonedF->setLinkage(GlobalValue::AvailableExternallyLinkage); + ClonedF->addFnAttr(Attribute::AlwaysInline); + return ClonedF; + } + + return cloneFunctionDecl(*M, *F); + } else if (auto *A = dyn_cast(V)) { + auto *PTy = cast(A->getType()); + if (PTy->getElementType()->isFunctionTy()) + return Function::Create(cast(PTy->getElementType()), + GlobalValue::ExternalLinkage, + A->getName(), M.get()); + // else + return new GlobalVariable(*M, PTy->getElementType(), false, + GlobalValue::ExternalLinkage, + nullptr, A->getName(), nullptr, + GlobalValue::NotThreadLocal, + PTy->getAddressSpace()); + } + // Else. + return nullptr; + }); // Create decls in the new module. for (auto *F : Part) @@ -316,7 +401,7 @@ private: // Move the function bodies. for (auto *F : Part) - moveFunctionBody(*F, VMap, &GDM); + moveFunctionBody(*F, VMap, &Materializer); // Create memory manager and symbol resolver. auto MemMgr = llvm::make_unique(); @@ -340,9 +425,11 @@ private: } BaseLayerT &BaseLayer; + PartitioningFtor Partition; CompileCallbackMgrT &CompileCallbackMgr; + IndirectStubsManagerBuilderT CreateIndirectStubsManager; + LogicalDylibList LogicalDylibs; - PartitioningFtor Partition; bool CloneStubsIntoPartitions; }; diff --git a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index e5cd0d2559c..ec4befd2e3c 100644 --- a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -118,6 +118,9 @@ protected: typedef std::map TrampolineMapT; TrampolineMapT ActiveTrampolines; std::vector AvailableTrampolines; + +private: + virtual void anchor(); }; /// @brief Manage compile callbacks. @@ -222,6 +225,93 @@ private: TargetAddress ResolverBlockAddr; }; +/// @brief Base class for managing collections of named indirect stubs. +class IndirectStubsManagerBase { +public: + + /// @brief Map type for initializing the manager. See init. + typedef StringMap> StubInitsMap; + + virtual ~IndirectStubsManagerBase() {} + + /// @brief Create StubInits.size() stubs with the given names, target + /// addresses, and flags. + virtual std::error_code init(const StubInitsMap &StubInits) = 0; + + /// @brief Find the stub with the given name. If ExportedStubsOnly is true, + /// this will only return a result if the stub's flags indicate that it + /// is exported. + virtual JITSymbol findStub(StringRef Name, bool ExportedStubsOnly) = 0; + + /// @brief Find the implementation-pointer for the stub. + virtual JITSymbol findPointer(StringRef Name) = 0; + + /// @brief Change the value of the implementation pointer for the stub. + virtual std::error_code updatePointer(StringRef Name, TargetAddress NewAddr) = 0; +private: + virtual void anchor(); +}; + +/// @brief IndirectStubsManager implementation for a concrete target, e.g. OrcX86_64. +/// (See OrcTargetSupport.h). +template +class IndirectStubsManager : public IndirectStubsManagerBase { +public: + + std::error_code + init(const StubInitsMap &StubInits) override { + if (auto EC = TargetT::emitIndirectStubsBlock(IndirectStubsInfo, + StubInits.size(), + nullptr)) + return EC; + + unsigned I = 0; + for (auto &Entry : StubInits) { + *IndirectStubsInfo.getPtr(I) = + reinterpret_cast(static_cast(Entry.second.first)); + StubIndexes[Entry.first()] = std::make_pair(I++, Entry.second.second); + } + + return std::error_code(); + } + + JITSymbol findStub(StringRef Name, bool ExportedStubsOnly) override { + auto I = StubIndexes.find(Name); + if (I == StubIndexes.end()) + return nullptr; + void *StubAddr = IndirectStubsInfo.getStub(I->second.first); + assert(StubAddr && "Missing stub address"); + auto StubTargetAddr = + static_cast(reinterpret_cast(StubAddr)); + auto StubSymbol = JITSymbol(StubTargetAddr, I->second.second); + if (ExportedStubsOnly && !StubSymbol.isExported()) + return nullptr; + return StubSymbol; + } + + JITSymbol findPointer(StringRef Name) override { + auto I = StubIndexes.find(Name); + if (I == StubIndexes.end()) + return nullptr; + void *PtrAddr = IndirectStubsInfo.getPtr(StubIndexes[Name].first); + assert(PtrAddr && "Missing pointer address"); + auto PtrTargetAddr = + static_cast(reinterpret_cast(PtrAddr)); + return JITSymbol(PtrTargetAddr, JITSymbolFlags::None); + } + + std::error_code updatePointer(StringRef Name, TargetAddress NewAddr) override { + assert(StubIndexes.count(Name) && "No stub pointer for symbol"); + *IndirectStubsInfo.getPtr(StubIndexes[Name].first) = + reinterpret_cast(static_cast(NewAddr)); + return std::error_code(); + } + +private: + typename TargetT::IndirectStubsInfo IndirectStubsInfo; + StringMap> StubIndexes; +}; + /// @brief Build a function pointer of FunctionType with the given constant /// address. /// @@ -236,7 +326,7 @@ GlobalVariable* createImplPointer(PointerType &PT, Module &M, /// @brief Turn a function declaration into a stub function that makes an /// indirect call using the given function pointer. -void makeStub(Function &F, GlobalVariable &ImplPointer); +void makeStub(Function &F, Value &ImplPointer); /// @brief Raise linkage types and rename as necessary to ensure that all /// symbols are accessible for other modules. @@ -289,9 +379,9 @@ void moveGlobalVariableInitializer(GlobalVariable &OrigGV, ValueMaterializer *Materializer = nullptr, GlobalVariable *NewGV = nullptr); -GlobalAlias* cloneGlobalAlias(Module &Dst, const GlobalAlias &OrigA, - ValueToValueMapTy &VMap, - ValueMaterializer *Materializer = nullptr); +/// @brief Clone +GlobalAlias* cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA, + ValueToValueMapTy &VMap); } // End namespace orc. } // End namespace llvm. diff --git a/include/llvm/ExecutionEngine/Orc/LogicalDylib.h b/include/llvm/ExecutionEngine/Orc/LogicalDylib.h index 79e8a2e36dc..2580ab68243 100644 --- a/include/llvm/ExecutionEngine/Orc/LogicalDylib.h +++ b/include/llvm/ExecutionEngine/Orc/LogicalDylib.h @@ -82,22 +82,27 @@ public: } JITSymbol findSymbolInLogicalModule(LogicalModuleHandle LMH, - const std::string &Name) { + const std::string &Name, + bool ExportedSymbolsOnly) { + + if (auto StubSym = LMH->Resources.findSymbol(Name, ExportedSymbolsOnly)) + return StubSym; + for (auto BLH : LMH->BaseLayerHandles) - if (auto Symbol = BaseLayer.findSymbolIn(BLH, Name, false)) + if (auto Symbol = BaseLayer.findSymbolIn(BLH, Name, ExportedSymbolsOnly)) return Symbol; return nullptr; } JITSymbol findSymbolInternally(LogicalModuleHandle LMH, const std::string &Name) { - if (auto Symbol = findSymbolInLogicalModule(LMH, Name)) + if (auto Symbol = findSymbolInLogicalModule(LMH, Name, false)) return Symbol; for (auto LMI = LogicalModules.begin(), LME = LogicalModules.end(); LMI != LME; ++LMI) { if (LMI != LMH) - if (auto Symbol = findSymbolInLogicalModule(LMI, Name)) + if (auto Symbol = findSymbolInLogicalModule(LMI, Name, false)) return Symbol; } @@ -105,11 +110,10 @@ public: } JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) { - for (auto &LM : LogicalModules) - for (auto BLH : LM.BaseLayerHandles) - if (auto Symbol = - BaseLayer.findSymbolIn(BLH, Name, ExportedSymbolsOnly)) - return Symbol; + for (auto LMI = LogicalModules.begin(), LME = LogicalModules.end(); + LMI != LME; ++LMI) + if (auto Sym = findSymbolInLogicalModule(LMI, Name, ExportedSymbolsOnly)) + return Sym; return nullptr; } @@ -119,7 +123,6 @@ protected: BaseLayerT BaseLayer; LogicalModuleList LogicalModules; LogicalDylibResources DylibResources; - }; } // End namespace orc. diff --git a/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h b/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h index 309f5a96090..bf0e41e9f1f 100644 --- a/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h +++ b/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h @@ -9,12 +9,17 @@ // // Target specific code for Orc, e.g. callback assembly. // +// Target classes should be part of the JIT *target* process, not the host +// process (except where you're doing hosted JITing and the two are one and the +// same). +// //===----------------------------------------------------------------------===// #ifndef LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H #define LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H #include "IndirectionUtils.h" +#include "llvm/Support/Memory.h" namespace llvm { namespace orc { @@ -45,6 +50,46 @@ public: unsigned NumCalls, unsigned StartIndex = 0); + /// @brief Provide information about stub blocks generated by the + /// makeIndirectStubsBlock function. + class IndirectStubsInfo { + friend class OrcX86_64; + public: + const static unsigned StubSize = 8; + const static unsigned PtrSize = 8; + + IndirectStubsInfo() : NumStubs(0) {} + ~IndirectStubsInfo(); + + /// @brief Number of stubs in this block. + unsigned getNumStubs() const { return NumStubs; } + + /// @brief Get a pointer to the stub at the given index, which must be in + /// the range 0 .. getNumStubs() - 1. + void* getStub(unsigned Idx) const { + return static_cast(StubsBlock.base()) + Idx; + } + + /// @brief Get a pointer to the implementation-pointer at the given index, + /// which must be in the range 0 .. getNumStubs() - 1. + void** getPtr(unsigned Idx) const { + return static_cast(PtrsBlock.base()) + Idx; + } + private: + unsigned NumStubs; + sys::MemoryBlock StubsBlock; + sys::MemoryBlock PtrsBlock; + }; + + /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to + /// the nearest page size. + /// + /// E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k + /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513 + /// will return a block of 1024 (2-pages worth). + static std::error_code emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, + unsigned MinStubs, + void *InitialPtrVal); }; } // End namespace orc. diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 4d207cffd20..5b1eccd82a4 100644 --- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -19,6 +19,9 @@ namespace llvm { namespace orc { +void JITCompileCallbackManagerBase::anchor() {} +void IndirectStubsManagerBase::anchor() {} + Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr) { Constant *AddrIntVal = ConstantInt::get(Type::getInt64Ty(FT.getContext()), Addr); @@ -37,7 +40,7 @@ GlobalVariable* createImplPointer(PointerType &PT, Module &M, return IP; } -void makeStub(Function &F, GlobalVariable &ImplPointer) { +void makeStub(Function &F, Value &ImplPointer) { assert(F.isDeclaration() && "Can't turn a definition into a stub."); assert(F.getParent() && "Function isn't in a module."); Module &M = *F.getParent(); @@ -106,6 +109,9 @@ void makeAllSymbolsExternallyAccessible(Module &M) { for (auto &GV : M.globals()) raiseVisibilityOnValue(GV, Renamer); + + for (auto &A : M.aliases()) + raiseVisibilityOnValue(A, Renamer); } Function* cloneFunctionDecl(Module &Dst, const Function &F, @@ -177,17 +183,14 @@ void moveGlobalVariableInitializer(GlobalVariable &OrigGV, nullptr, Materializer)); } -GlobalAlias* cloneGlobalAlias(Module &Dst, const GlobalAlias &OrigA, - ValueToValueMapTy &VMap, - ValueMaterializer *Materializer) { +GlobalAlias* cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA, + ValueToValueMapTy &VMap) { assert(OrigA.getAliasee() && "Original alias doesn't have an aliasee?"); auto *NewA = GlobalAlias::create(OrigA.getValueType(), OrigA.getType()->getPointerAddressSpace(), OrigA.getLinkage(), OrigA.getName(), &Dst); NewA->copyAttributesFrom(&OrigA); VMap[&OrigA] = NewA; - NewA->setAliasee(cast(MapValue(OrigA.getAliasee(), VMap, RF_None, - nullptr, Materializer))); return NewA; } diff --git a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp index 258868aa64f..ef34c4a2b45 100644 --- a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp +++ b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp @@ -1,7 +1,9 @@ #include "llvm/ADT/Triple.h" #include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h" +#include "llvm/Support/Process.h" #include + using namespace llvm::orc; namespace { @@ -134,5 +136,81 @@ OrcX86_64::insertCompileCallbackTrampolines(Module &M, return GetLabelName; } +OrcX86_64::IndirectStubsInfo::~IndirectStubsInfo() { + sys::Memory::releaseMappedMemory(StubsBlock); + sys::Memory::releaseMappedMemory(PtrsBlock); +} + +std::error_code OrcX86_64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo, + unsigned MinStubs, + void *InitialPtrVal) { + // Stub format is: + // + // .section __orc_stubs + // stub1: + // jmpq *ptr1(%rip) + // .byte 0xC4 ; <- Invalid opcode padding. + // .byte 0xF1 + // stub2: + // jmpq *ptr2(%rip) + // + // ... + // + // .section __orc_ptrs + // ptr1: + // .quad 0x0 + // ptr2: + // .quad 0x0 + // + // ... + + const unsigned StubSize = IndirectStubsInfo::StubSize; + + // Emit at least MinStubs, rounded up to fill the pages allocated. + unsigned PageSize = sys::Process::getPageSize(); + unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize; + unsigned NumStubs = (NumPages * PageSize) / StubSize; + + // Allocate memory for stubs and pointers in one call. + std::error_code EC; + auto InitialBlock = sys::Memory::allocateMappedMemory(2 * NumPages * PageSize, + nullptr, + sys::Memory::MF_READ | + sys::Memory::MF_WRITE, + EC); + + if (EC) + return EC; + + // Create separate MemoryBlocks representing the stubs and pointers. + sys::MemoryBlock StubsBlock(InitialBlock.base(), NumPages * PageSize); + sys::MemoryBlock PtrsBlock(static_cast(InitialBlock.base()) + + NumPages * PageSize, + NumPages * PageSize); + + // Populate the stubs page stubs and mark it executable. + uint64_t *Stub = reinterpret_cast(StubsBlock.base()); + uint64_t PtrOffsetField = + static_cast(NumPages * PageSize - 6) << 16; + for (unsigned I = 0; I < NumStubs; ++I) + Stub[I] = 0xF1C40000000025ff | PtrOffsetField; + + if (auto EC = sys::Memory::protectMappedMemory(StubsBlock, + sys::Memory::MF_READ | + sys::Memory::MF_EXEC)) + return EC; + + // Initialize all pointers to point at FailureAddress. + void **Ptr = reinterpret_cast(PtrsBlock.base()); + for (unsigned I = 0; I < NumStubs; ++I) + Ptr[I] = InitialPtrVal; + + StubsInfo.NumStubs = NumStubs; + StubsInfo.StubsBlock = std::move(StubsBlock); + StubsInfo.PtrsBlock = std::move(PtrsBlock); + + return std::error_code(); +} + } // End namespace orc. } // End namespace llvm. diff --git a/tools/lli/OrcLazyJIT.cpp b/tools/lli/OrcLazyJIT.cpp index 4ac2ccffcd5..aec6e1a7297 100644 --- a/tools/lli/OrcLazyJIT.cpp +++ b/tools/lli/OrcLazyJIT.cpp @@ -38,11 +38,16 @@ namespace { "Dump modules to the current " "working directory. (WARNING: " "will overwrite existing files)."), - clEnumValEnd)); + clEnumValEnd), + cl::Hidden); + + cl::opt OrcInlineStubs("orc-lazy-inline-stubs", + cl::desc("Try to inline stubs"), + cl::init(true), cl::Hidden); } OrcLazyJIT::CallbackManagerBuilder -OrcLazyJIT::createCallbackManagerBuilder(Triple T) { +OrcLazyJIT::createCallbackMgrBuilder(Triple T) { switch (T.getArch()) { default: return nullptr; @@ -58,6 +63,18 @@ OrcLazyJIT::createCallbackManagerBuilder(Triple T) { } } +OrcLazyJIT::IndirectStubsManagerBuilder +OrcLazyJIT::createIndirectStubsMgrBuilder(Triple T) { + switch (T.getArch()) { + default: return nullptr; + + case Triple::x86_64: + return [](){ + return llvm::make_unique>(); + }; + } +} + OrcLazyJIT::TransformFtor OrcLazyJIT::createDebugDumper() { switch (OrcDumpKind) { @@ -111,6 +128,12 @@ OrcLazyJIT::TransformFtor OrcLazyJIT::createDebugDumper() { // Defined in lli.cpp. CodeGenOpt::Level getOptLevel(); + +template +static PtrTy fromTargetAddress(orc::TargetAddress Addr) { + return reinterpret_cast(static_cast(Addr)); +} + int llvm::runOrcLazyJIT(std::unique_ptr M, int ArgC, char* ArgV[]) { // Add the program's symbols into the JIT's search space. if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr)) { @@ -123,10 +146,9 @@ int llvm::runOrcLazyJIT(std::unique_ptr M, int ArgC, char* ArgV[]) { EngineBuilder EB; EB.setOptLevel(getOptLevel()); auto TM = std::unique_ptr(EB.selectTarget()); - M->setDataLayout(TM->createDataLayout()); auto &Context = getGlobalContext(); auto CallbackMgrBuilder = - OrcLazyJIT::createCallbackManagerBuilder(Triple(TM->getTargetTriple())); + OrcLazyJIT::createCallbackMgrBuilder(Triple(TM->getTargetTriple())); // If we couldn't build the factory function then there must not be a callback // manager for this target. Bail out. @@ -136,9 +158,20 @@ int llvm::runOrcLazyJIT(std::unique_ptr M, int ArgC, char* ArgV[]) { return 1; } + auto IndirectStubsMgrBuilder = + OrcLazyJIT::createIndirectStubsMgrBuilder(Triple(TM->getTargetTriple())); + + // If we couldn't build a stubs-manager-builder for this target then bail out. + if (!IndirectStubsMgrBuilder) { + errs() << "No indirect stubs manager available for target '" + << TM->getTargetTriple().str() << "'.\n"; + return 1; + } + // Everything looks good. Build the JIT. - auto &DL = M->getDataLayout(); - OrcLazyJIT J(std::move(TM), DL, Context, CallbackMgrBuilder); + OrcLazyJIT J(std::move(TM), Context, CallbackMgrBuilder, + std::move(IndirectStubsMgrBuilder), + OrcInlineStubs); // Add the module, look up main and run it. auto MainHandle = J.addModule(std::move(M)); @@ -150,6 +183,6 @@ int llvm::runOrcLazyJIT(std::unique_ptr M, int ArgC, char* ArgV[]) { } typedef int (*MainFnPtr)(int, char*[]); - auto Main = OrcLazyJIT::fromTargetAddress(MainSym.getAddress()); + auto Main = fromTargetAddress(MainSym.getAddress()); return Main(ArgC, ArgV); } diff --git a/tools/lli/OrcLazyJIT.h b/tools/lli/OrcLazyJIT.h index ac1199dbde0..389380e8dd4 100644 --- a/tools/lli/OrcLazyJIT.h +++ b/tools/lli/OrcLazyJIT.h @@ -37,6 +37,8 @@ public: TransformFtor; typedef orc::IRTransformLayer IRDumpLayerT; typedef orc::CompileOnDemandLayer CODLayerT; + typedef CODLayerT::IndirectStubsManagerBuilderT + IndirectStubsManagerBuilder; typedef CODLayerT::ModuleSetHandleT ModuleHandleT; typedef std::function< @@ -45,16 +47,16 @@ public: LLVMContext&)> CallbackManagerBuilder; - static CallbackManagerBuilder createCallbackManagerBuilder(Triple T); - const DataLayout &DL; - - OrcLazyJIT(std::unique_ptr TM, const DataLayout &DL, - LLVMContext &Context, CallbackManagerBuilder &BuildCallbackMgr) - : DL(DL), TM(std::move(TM)), ObjectLayer(), + OrcLazyJIT(std::unique_ptr TM, LLVMContext &Context, + CallbackManagerBuilder &BuildCallbackMgr, + IndirectStubsManagerBuilder IndirectStubsMgrBuilder, + bool InlineStubs) + : TM(std::move(TM)), DL(this->TM->createDataLayout()), ObjectLayer(), CompileLayer(ObjectLayer, orc::SimpleCompiler(*this->TM)), IRDumpLayer(CompileLayer, createDebugDumper()), CCMgr(BuildCallbackMgr(IRDumpLayer, CCMgrMemMgr, Context)), - CODLayer(IRDumpLayer, *CCMgr, extractSingleFunction, false), + CODLayer(IRDumpLayer, extractSingleFunction, *CCMgr, + std::move(IndirectStubsMgrBuilder), InlineStubs), CXXRuntimeOverrides( [this](const std::string &S) { return mangle(S); }) {} @@ -66,10 +68,9 @@ public: DtorRunner.runViaLayer(CODLayer); } - template - static PtrTy fromTargetAddress(orc::TargetAddress Addr) { - return reinterpret_cast(static_cast(Addr)); - } + static CallbackManagerBuilder createCallbackMgrBuilder(Triple T); + + static IndirectStubsManagerBuilder createIndirectStubsMgrBuilder(Triple T); ModuleHandleT addModule(std::unique_ptr M) { // Attach a data-layout if one isn't already present. @@ -151,6 +152,7 @@ private: static TransformFtor createDebugDumper(); std::unique_ptr TM; + DataLayout DL; SectionMemoryManager CCMgrMemMgr; ObjLayerT ObjectLayer;