1 //===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the Link Time Optimization library. This library is
11 // intended to be used by linker to optimize code at link time.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/LTO/LTOModule.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/Bitcode/ReaderWriter.h"
18 #include "llvm/CodeGen/Analysis.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DiagnosticPrinter.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/IR/Mangler.h"
23 #include "llvm/IR/Metadata.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCParser/MCAsmParser.h"
29 #include "llvm/MC/MCSection.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/MC/MCTargetAsmParser.h"
33 #include "llvm/MC/SubtargetFeature.h"
34 #include "llvm/Object/IRObjectFile.h"
35 #include "llvm/Object/ObjectFile.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/FileSystem.h"
38 #include "llvm/Support/Host.h"
39 #include "llvm/Support/MemoryBuffer.h"
40 #include "llvm/Support/Path.h"
41 #include "llvm/Support/SourceMgr.h"
42 #include "llvm/Support/TargetRegistry.h"
43 #include "llvm/Support/TargetSelect.h"
44 #include "llvm/Target/TargetLowering.h"
45 #include "llvm/Target/TargetLoweringObjectFile.h"
46 #include "llvm/Target/TargetRegisterInfo.h"
47 #include "llvm/Target/TargetSubtargetInfo.h"
48 #include "llvm/Transforms/Utils/GlobalStatus.h"
49 #include <system_error>
51 using namespace llvm::object;
53 LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
54 llvm::TargetMachine *TM)
55 : IRFile(std::move(Obj)), _target(TM) {}
57 LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
58 llvm::TargetMachine *TM,
59 std::unique_ptr<LLVMContext> Context)
60 : OwnedContext(std::move(Context)), IRFile(std::move(Obj)), _target(TM) {}
62 LTOModule::~LTOModule() {}
64 /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
66 bool LTOModule::isBitcodeFile(const void *Mem, size_t Length) {
67 ErrorOr<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
68 MemoryBufferRef(StringRef((const char *)Mem, Length), "<mem>"));
72 bool LTOModule::isBitcodeFile(const char *Path) {
73 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
74 MemoryBuffer::getFile(Path);
78 ErrorOr<MemoryBufferRef> BCData = IRObjectFile::findBitcodeInMemBuffer(
79 BufferOrErr.get()->getMemBufferRef());
83 bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
84 StringRef TriplePrefix) {
85 ErrorOr<MemoryBufferRef> BCOrErr =
86 IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef());
90 std::string Triple = getBitcodeTargetTriple(*BCOrErr, Context);
91 return StringRef(Triple).startswith(TriplePrefix);
94 std::string LTOModule::getProducerString(MemoryBuffer *Buffer) {
95 ErrorOr<MemoryBufferRef> BCOrErr =
96 IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef());
100 return getBitcodeProducerString(*BCOrErr, Context);
103 ErrorOr<std::unique_ptr<LTOModule>>
104 LTOModule::createFromFile(LLVMContext &Context, const char *path,
105 TargetOptions options) {
106 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
107 MemoryBuffer::getFile(path);
108 if (std::error_code EC = BufferOrErr.getError())
110 std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
111 return makeLTOModule(Buffer->getMemBufferRef(), options, &Context);
114 ErrorOr<std::unique_ptr<LTOModule>>
115 LTOModule::createFromOpenFile(LLVMContext &Context, int fd, const char *path,
116 size_t size, TargetOptions options) {
117 return createFromOpenFileSlice(Context, fd, path, size, 0, options);
120 ErrorOr<std::unique_ptr<LTOModule>>
121 LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd,
122 const char *path, size_t map_size,
123 off_t offset, TargetOptions options) {
124 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
125 MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
126 if (std::error_code EC = BufferOrErr.getError())
128 std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
129 return makeLTOModule(Buffer->getMemBufferRef(), options, &Context);
132 ErrorOr<std::unique_ptr<LTOModule>>
133 LTOModule::createFromBuffer(LLVMContext &Context, const void *mem,
134 size_t length, TargetOptions options,
136 return createInContext(mem, length, options, path, &Context);
139 ErrorOr<std::unique_ptr<LTOModule>>
140 LTOModule::createInLocalContext(const void *mem, size_t length,
141 TargetOptions options, StringRef path) {
142 return createInContext(mem, length, options, path, nullptr);
145 ErrorOr<std::unique_ptr<LTOModule>>
146 LTOModule::createInContext(const void *mem, size_t length,
147 TargetOptions options, StringRef path,
148 LLVMContext *Context) {
149 StringRef Data((const char *)mem, length);
150 MemoryBufferRef Buffer(Data, path);
151 return makeLTOModule(Buffer, options, Context);
154 static ErrorOr<std::unique_ptr<Module>>
155 parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context,
159 ErrorOr<MemoryBufferRef> MBOrErr =
160 IRObjectFile::findBitcodeInMemBuffer(Buffer);
161 if (std::error_code EC = MBOrErr.getError())
165 // Parse the full file.
166 ErrorOr<std::unique_ptr<Module>> M = parseBitcodeFile(*MBOrErr, Context);
167 if (std::error_code EC = M.getError())
169 return std::move(*M);
173 std::unique_ptr<MemoryBuffer> LightweightBuf =
174 MemoryBuffer::getMemBuffer(*MBOrErr, false);
175 ErrorOr<std::unique_ptr<Module>> M =
176 getLazyBitcodeModule(std::move(LightweightBuf), Context, nullptr,
177 true /*ShouldLazyLoadMetadata*/);
178 if (std::error_code EC = M.getError())
180 return std::move(*M);
183 ErrorOr<std::unique_ptr<LTOModule>>
184 LTOModule::makeLTOModule(MemoryBufferRef Buffer, TargetOptions options,
185 LLVMContext *Context) {
186 std::unique_ptr<LLVMContext> OwnedContext;
188 OwnedContext = llvm::make_unique<LLVMContext>();
189 Context = OwnedContext.get();
192 // If we own a context, we know this is being used only for symbol
193 // extraction, not linking. Be lazy in that case.
194 ErrorOr<std::unique_ptr<Module>> MOrErr =
195 parseBitcodeFileImpl(Buffer, *Context,
196 /* ShouldBeLazy */ static_cast<bool>(OwnedContext));
197 if (std::error_code EC = MOrErr.getError())
199 std::unique_ptr<Module> &M = *MOrErr;
201 std::string TripleStr = M->getTargetTriple();
202 if (TripleStr.empty())
203 TripleStr = sys::getDefaultTargetTriple();
204 llvm::Triple Triple(TripleStr);
206 // find machine architecture for this module
208 const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
210 return std::unique_ptr<LTOModule>(nullptr);
212 // construct LTOModule, hand over ownership of module and target
213 SubtargetFeatures Features;
214 Features.getDefaultSubtargetFeatures(Triple);
215 std::string FeatureStr = Features.getString();
216 // Set a default CPU for Darwin triples.
218 if (Triple.isOSDarwin()) {
219 if (Triple.getArch() == llvm::Triple::x86_64)
221 else if (Triple.getArch() == llvm::Triple::x86)
223 else if (Triple.getArch() == llvm::Triple::aarch64)
227 TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
229 M->setDataLayout(target->createDataLayout());
231 std::unique_ptr<object::IRObjectFile> IRObj(
232 new object::IRObjectFile(Buffer, std::move(M)));
234 std::unique_ptr<LTOModule> Ret;
236 Ret.reset(new LTOModule(std::move(IRObj), target, std::move(OwnedContext)));
238 Ret.reset(new LTOModule(std::move(IRObj), target));
241 Ret->parseMetadata();
243 return std::move(Ret);
246 /// Create a MemoryBuffer from a memory range with an optional name.
247 std::unique_ptr<MemoryBuffer>
248 LTOModule::makeBuffer(const void *mem, size_t length, StringRef name) {
249 const char *startPtr = (const char*)mem;
250 return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), name, false);
253 /// objcClassNameFromExpression - Get string that the data pointer points to.
255 LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
256 if (const ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
257 Constant *op = ce->getOperand(0);
258 if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) {
259 Constant *cn = gvn->getInitializer();
260 if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) {
261 if (ca->isCString()) {
262 name = (".objc_class_name_" + ca->getAsCString()).str();
271 /// addObjCClass - Parse i386/ppc ObjC class data structure.
272 void LTOModule::addObjCClass(const GlobalVariable *clgv) {
273 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
276 // second slot in __OBJC,__class is pointer to superclass name
277 std::string superclassName;
278 if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
280 _undefines.insert(std::make_pair(superclassName, NameAndAttributes()));
281 if (IterBool.second) {
282 NameAndAttributes &info = IterBool.first->second;
283 info.name = IterBool.first->first().data();
284 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
285 info.isFunction = false;
290 // third slot in __OBJC,__class is pointer to class name
291 std::string className;
292 if (objcClassNameFromExpression(c->getOperand(2), className)) {
293 auto Iter = _defines.insert(className).first;
295 NameAndAttributes info;
296 info.name = Iter->first().data();
297 info.attributes = LTO_SYMBOL_PERMISSIONS_DATA |
298 LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT;
299 info.isFunction = false;
301 _symbols.push_back(info);
305 /// addObjCCategory - Parse i386/ppc ObjC category data structure.
306 void LTOModule::addObjCCategory(const GlobalVariable *clgv) {
307 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
310 // second slot in __OBJC,__category is pointer to target class name
311 std::string targetclassName;
312 if (!objcClassNameFromExpression(c->getOperand(1), targetclassName))
316 _undefines.insert(std::make_pair(targetclassName, NameAndAttributes()));
318 if (!IterBool.second)
321 NameAndAttributes &info = IterBool.first->second;
322 info.name = IterBool.first->first().data();
323 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
324 info.isFunction = false;
328 /// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
329 void LTOModule::addObjCClassRef(const GlobalVariable *clgv) {
330 std::string targetclassName;
331 if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName))
335 _undefines.insert(std::make_pair(targetclassName, NameAndAttributes()));
337 if (!IterBool.second)
340 NameAndAttributes &info = IterBool.first->second;
341 info.name = IterBool.first->first().data();
342 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
343 info.isFunction = false;
347 void LTOModule::addDefinedDataSymbol(const object::BasicSymbolRef &Sym) {
348 SmallString<64> Buffer;
350 raw_svector_ostream OS(Buffer);
354 const GlobalValue *V = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
355 addDefinedDataSymbol(Buffer.c_str(), V);
358 void LTOModule::addDefinedDataSymbol(const char *Name, const GlobalValue *v) {
359 // Add to list of defined symbols.
360 addDefinedSymbol(Name, v, false);
362 if (!v->hasSection() /* || !isTargetDarwin */)
365 // Special case i386/ppc ObjC data structures in magic sections:
366 // The issue is that the old ObjC object format did some strange
367 // contortions to avoid real linker symbols. For instance, the
368 // ObjC class data structure is allocated statically in the executable
369 // that defines that class. That data structures contains a pointer to
370 // its superclass. But instead of just initializing that part of the
371 // struct to the address of its superclass, and letting the static and
372 // dynamic linkers do the rest, the runtime works by having that field
373 // instead point to a C-string that is the name of the superclass.
374 // At runtime the objc initialization updates that pointer and sets
375 // it to point to the actual super class. As far as the linker
376 // knows it is just a pointer to a string. But then someone wanted the
377 // linker to issue errors at build time if the superclass was not found.
378 // So they figured out a way in mach-o object format to use an absolute
379 // symbols (.objc_class_name_Foo = 0) and a floating reference
380 // (.reference .objc_class_name_Bar) to cause the linker into erroring when
381 // a class was missing.
382 // The following synthesizes the implicit .objc_* symbols for the linker
383 // from the ObjC data structures generated by the front end.
385 // special case if this data blob is an ObjC class definition
386 std::string Section = v->getSection();
387 if (Section.compare(0, 15, "__OBJC,__class,") == 0) {
388 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
393 // special case if this data blob is an ObjC category definition
394 else if (Section.compare(0, 18, "__OBJC,__category,") == 0) {
395 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
400 // special case if this data blob is the list of referenced classes
401 else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) {
402 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
408 void LTOModule::addDefinedFunctionSymbol(const object::BasicSymbolRef &Sym) {
409 SmallString<64> Buffer;
411 raw_svector_ostream OS(Buffer);
416 cast<Function>(IRFile->getSymbolGV(Sym.getRawDataRefImpl()));
417 addDefinedFunctionSymbol(Buffer.c_str(), F);
420 void LTOModule::addDefinedFunctionSymbol(const char *Name, const Function *F) {
421 // add to list of defined symbols
422 addDefinedSymbol(Name, F, true);
425 void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
427 // set alignment part log2() can have rounding errors
428 uint32_t align = def->getAlignment();
429 uint32_t attr = align ? countTrailingZeros(align) : 0;
431 // set permissions part
433 attr |= LTO_SYMBOL_PERMISSIONS_CODE;
435 const GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
436 if (gv && gv->isConstant())
437 attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
439 attr |= LTO_SYMBOL_PERMISSIONS_DATA;
442 // set definition part
443 if (def->hasWeakLinkage() || def->hasLinkOnceLinkage())
444 attr |= LTO_SYMBOL_DEFINITION_WEAK;
445 else if (def->hasCommonLinkage())
446 attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
448 attr |= LTO_SYMBOL_DEFINITION_REGULAR;
451 if (def->hasLocalLinkage())
452 // Ignore visibility if linkage is local.
453 attr |= LTO_SYMBOL_SCOPE_INTERNAL;
454 else if (def->hasHiddenVisibility())
455 attr |= LTO_SYMBOL_SCOPE_HIDDEN;
456 else if (def->hasProtectedVisibility())
457 attr |= LTO_SYMBOL_SCOPE_PROTECTED;
458 else if (canBeOmittedFromSymbolTable(def))
459 attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
461 attr |= LTO_SYMBOL_SCOPE_DEFAULT;
463 if (def->hasComdat())
464 attr |= LTO_SYMBOL_COMDAT;
466 if (isa<GlobalAlias>(def))
467 attr |= LTO_SYMBOL_ALIAS;
469 auto Iter = _defines.insert(Name).first;
471 // fill information structure
472 NameAndAttributes info;
473 StringRef NameRef = Iter->first();
474 info.name = NameRef.data();
475 assert(info.name[NameRef.size()] == '\0');
476 info.attributes = attr;
477 info.isFunction = isFunction;
480 // add to table of symbols
481 _symbols.push_back(info);
484 /// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the
486 void LTOModule::addAsmGlobalSymbol(const char *name,
487 lto_symbol_attributes scope) {
488 auto IterBool = _defines.insert(name);
490 // only add new define if not already defined
491 if (!IterBool.second)
494 NameAndAttributes &info = _undefines[IterBool.first->first().data()];
496 if (info.symbol == nullptr) {
497 // FIXME: This is trying to take care of module ASM like this:
499 // module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0"
501 // but is gross and its mother dresses it funny. Have the ASM parser give us
502 // more details for this type of situation so that we're not guessing so
505 // fill information structure
506 info.name = IterBool.first->first().data();
508 LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope;
509 info.isFunction = false;
510 info.symbol = nullptr;
512 // add to table of symbols
513 _symbols.push_back(info);
518 addDefinedFunctionSymbol(info.name, cast<Function>(info.symbol));
520 addDefinedDataSymbol(info.name, info.symbol);
522 _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK;
523 _symbols.back().attributes |= scope;
526 /// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the
528 void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
529 auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes()));
531 _asm_undefines.push_back(IterBool.first->first().data());
533 // we already have the symbol
534 if (!IterBool.second)
537 uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;
538 attr |= LTO_SYMBOL_SCOPE_DEFAULT;
539 NameAndAttributes &info = IterBool.first->second;
540 info.name = IterBool.first->first().data();
541 info.attributes = attr;
542 info.isFunction = false;
543 info.symbol = nullptr;
546 /// Add a symbol which isn't defined just yet to a list to be resolved later.
547 void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
549 SmallString<64> name;
551 raw_svector_ostream OS(name);
555 auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes()));
557 // we already have the symbol
558 if (!IterBool.second)
561 NameAndAttributes &info = IterBool.first->second;
563 info.name = IterBool.first->first().data();
565 const GlobalValue *decl = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
567 if (decl->hasExternalWeakLinkage())
568 info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
570 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
572 info.isFunction = isFunc;
576 void LTOModule::parseSymbols() {
577 for (auto &Sym : IRFile->symbols()) {
578 const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
579 uint32_t Flags = Sym.getFlags();
580 if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
583 bool IsUndefined = Flags & object::BasicSymbolRef::SF_Undefined;
586 SmallString<64> Buffer;
588 raw_svector_ostream OS(Buffer);
591 const char *Name = Buffer.c_str();
594 addAsmGlobalSymbolUndef(Name);
595 else if (Flags & object::BasicSymbolRef::SF_Global)
596 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_DEFAULT);
598 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_INTERNAL);
602 auto *F = dyn_cast<Function>(GV);
604 addPotentialUndefinedSymbol(Sym, F != nullptr);
609 addDefinedFunctionSymbol(Sym);
613 if (isa<GlobalVariable>(GV)) {
614 addDefinedDataSymbol(Sym);
618 assert(isa<GlobalAlias>(GV));
619 addDefinedDataSymbol(Sym);
622 // make symbols for all undefines
623 for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(),
624 e = _undefines.end(); u != e; ++u) {
625 // If this symbol also has a definition, then don't make an undefine because
626 // it is a tentative definition.
627 if (_defines.count(u->getKey())) continue;
628 NameAndAttributes info = u->getValue();
629 _symbols.push_back(info);
633 /// parseMetadata - Parse metadata from the module
634 void LTOModule::parseMetadata() {
635 raw_string_ostream OS(LinkerOpts);
638 if (Metadata *Val = getModule().getModuleFlag("Linker Options")) {
639 MDNode *LinkerOptions = cast<MDNode>(Val);
640 for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
641 MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
642 for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
643 MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
644 OS << " " << MDOption->getString();
651 for (const NameAndAttributes &Sym : _symbols) {
654 _target->getObjFileLowering()->emitLinkerFlagsForGlobal(OS, Sym.symbol,
658 // Add other interesting metadata here.