1 //===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the Link Time Optimization library. This library is
11 // intended to be used by linker to optimize code at link time.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/LTO/LTOModule.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/Bitcode/ReaderWriter.h"
18 #include "llvm/CodeGen/Analysis.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/LLVMContext.h"
21 #include "llvm/IR/Metadata.h"
22 #include "llvm/IR/Module.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCSection.h"
28 #include "llvm/MC/MCSubtargetInfo.h"
29 #include "llvm/MC/MCSymbol.h"
30 #include "llvm/MC/MCTargetAsmParser.h"
31 #include "llvm/MC/SubtargetFeature.h"
32 #include "llvm/Support/CommandLine.h"
33 #include "llvm/Support/FileSystem.h"
34 #include "llvm/Support/Host.h"
35 #include "llvm/Support/MemoryBuffer.h"
36 #include "llvm/Support/Path.h"
37 #include "llvm/Support/SourceMgr.h"
38 #include "llvm/Support/TargetRegistry.h"
39 #include "llvm/Support/TargetSelect.h"
40 #include "llvm/Target/TargetLowering.h"
41 #include "llvm/Target/TargetLoweringObjectFile.h"
42 #include "llvm/Target/TargetRegisterInfo.h"
43 #include "llvm/Target/TargetSubtargetInfo.h"
44 #include "llvm/Transforms/Utils/GlobalStatus.h"
45 #include <system_error>
48 LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
49 llvm::TargetMachine *TM)
50 : IRFile(std::move(Obj)), _target(TM) {}
52 /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
54 bool LTOModule::isBitcodeFile(const void *mem, size_t length) {
55 return sys::fs::identify_magic(StringRef((const char *)mem, length)) ==
56 sys::fs::file_magic::bitcode;
59 bool LTOModule::isBitcodeFile(const char *path) {
60 sys::fs::file_magic type;
61 if (sys::fs::identify_magic(path, type))
63 return type == sys::fs::file_magic::bitcode;
66 bool LTOModule::isBitcodeForTarget(MemoryBuffer *buffer,
67 StringRef triplePrefix) {
68 std::string Triple = getBitcodeTargetTriple(buffer, getGlobalContext());
69 return StringRef(Triple).startswith(triplePrefix);
72 LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options,
73 std::string &errMsg) {
74 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
75 MemoryBuffer::getFile(path);
76 if (std::error_code EC = BufferOrErr.getError()) {
77 errMsg = EC.message();
80 std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
81 return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg);
84 LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size,
85 TargetOptions options,
86 std::string &errMsg) {
87 return createFromOpenFileSlice(fd, path, size, 0, options, errMsg);
90 LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path,
91 size_t map_size, off_t offset,
92 TargetOptions options,
93 std::string &errMsg) {
94 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
95 MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
96 if (std::error_code EC = BufferOrErr.getError()) {
97 errMsg = EC.message();
100 std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
101 return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg);
104 LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length,
105 TargetOptions options,
106 std::string &errMsg, StringRef path) {
107 StringRef Data((char *)mem, length);
108 MemoryBufferRef Buffer(Data, path);
109 return makeLTOModule(Buffer, options, errMsg);
112 LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
113 TargetOptions options,
114 std::string &errMsg) {
115 StringRef Data = Buffer.getBuffer();
116 StringRef FileName = Buffer.getBufferIdentifier();
117 std::unique_ptr<MemoryBuffer> MemBuf(
118 makeBuffer(Data.begin(), Data.size(), FileName));
122 ErrorOr<Module *> MOrErr =
123 getLazyBitcodeModule(MemBuf.get(), getGlobalContext());
124 if (std::error_code EC = MOrErr.getError()) {
125 errMsg = EC.message();
128 std::unique_ptr<Module> M(MOrErr.get());
130 std::string TripleStr = M->getTargetTriple();
131 if (TripleStr.empty())
132 TripleStr = sys::getDefaultTargetTriple();
133 llvm::Triple Triple(TripleStr);
135 // find machine architecture for this module
136 const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
140 // construct LTOModule, hand over ownership of module and target
141 SubtargetFeatures Features;
142 Features.getDefaultSubtargetFeatures(Triple);
143 std::string FeatureStr = Features.getString();
144 // Set a default CPU for Darwin triples.
146 if (Triple.isOSDarwin()) {
147 if (Triple.getArch() == llvm::Triple::x86_64)
149 else if (Triple.getArch() == llvm::Triple::x86)
151 else if (Triple.getArch() == llvm::Triple::aarch64)
155 TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
157 M->materializeAllPermanently(true);
158 M->setDataLayout(target->getSubtargetImpl()->getDataLayout());
160 std::unique_ptr<object::IRObjectFile> IRObj(
161 new object::IRObjectFile(Buffer, std::move(M)));
163 LTOModule *Ret = new LTOModule(std::move(IRObj), target);
165 if (Ret->parseSymbols(errMsg)) {
170 Ret->parseMetadata();
175 /// Create a MemoryBuffer from a memory range with an optional name.
176 std::unique_ptr<MemoryBuffer>
177 LTOModule::makeBuffer(const void *mem, size_t length, StringRef name) {
178 const char *startPtr = (const char*)mem;
179 return std::unique_ptr<MemoryBuffer>(
180 MemoryBuffer::getMemBuffer(StringRef(startPtr, length), name, false));
183 /// objcClassNameFromExpression - Get string that the data pointer points to.
185 LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
186 if (const ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
187 Constant *op = ce->getOperand(0);
188 if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) {
189 Constant *cn = gvn->getInitializer();
190 if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) {
191 if (ca->isCString()) {
192 name = ".objc_class_name_" + ca->getAsCString().str();
201 /// addObjCClass - Parse i386/ppc ObjC class data structure.
202 void LTOModule::addObjCClass(const GlobalVariable *clgv) {
203 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
206 // second slot in __OBJC,__class is pointer to superclass name
207 std::string superclassName;
208 if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
209 NameAndAttributes info;
210 StringMap<NameAndAttributes>::value_type &entry =
211 _undefines.GetOrCreateValue(superclassName);
212 if (!entry.getValue().name) {
213 const char *symbolName = entry.getKey().data();
214 info.name = symbolName;
215 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
216 info.isFunction = false;
218 entry.setValue(info);
222 // third slot in __OBJC,__class is pointer to class name
223 std::string className;
224 if (objcClassNameFromExpression(c->getOperand(2), className)) {
225 StringSet::value_type &entry = _defines.GetOrCreateValue(className);
228 NameAndAttributes info;
229 info.name = entry.getKey().data();
230 info.attributes = LTO_SYMBOL_PERMISSIONS_DATA |
231 LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT;
232 info.isFunction = false;
234 _symbols.push_back(info);
238 /// addObjCCategory - Parse i386/ppc ObjC category data structure.
239 void LTOModule::addObjCCategory(const GlobalVariable *clgv) {
240 const ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
243 // second slot in __OBJC,__category is pointer to target class name
244 std::string targetclassName;
245 if (!objcClassNameFromExpression(c->getOperand(1), targetclassName))
248 NameAndAttributes info;
249 StringMap<NameAndAttributes>::value_type &entry =
250 _undefines.GetOrCreateValue(targetclassName);
252 if (entry.getValue().name)
255 const char *symbolName = entry.getKey().data();
256 info.name = symbolName;
257 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
258 info.isFunction = false;
260 entry.setValue(info);
263 /// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
264 void LTOModule::addObjCClassRef(const GlobalVariable *clgv) {
265 std::string targetclassName;
266 if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName))
269 NameAndAttributes info;
270 StringMap<NameAndAttributes>::value_type &entry =
271 _undefines.GetOrCreateValue(targetclassName);
272 if (entry.getValue().name)
275 const char *symbolName = entry.getKey().data();
276 info.name = symbolName;
277 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
278 info.isFunction = false;
280 entry.setValue(info);
283 void LTOModule::addDefinedDataSymbol(const object::BasicSymbolRef &Sym) {
284 SmallString<64> Buffer;
286 raw_svector_ostream OS(Buffer);
290 const GlobalValue *V = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
291 addDefinedDataSymbol(Buffer.c_str(), V);
294 void LTOModule::addDefinedDataSymbol(const char *Name, const GlobalValue *v) {
295 // Add to list of defined symbols.
296 addDefinedSymbol(Name, v, false);
298 if (!v->hasSection() /* || !isTargetDarwin */)
301 // Special case i386/ppc ObjC data structures in magic sections:
302 // The issue is that the old ObjC object format did some strange
303 // contortions to avoid real linker symbols. For instance, the
304 // ObjC class data structure is allocated statically in the executable
305 // that defines that class. That data structures contains a pointer to
306 // its superclass. But instead of just initializing that part of the
307 // struct to the address of its superclass, and letting the static and
308 // dynamic linkers do the rest, the runtime works by having that field
309 // instead point to a C-string that is the name of the superclass.
310 // At runtime the objc initialization updates that pointer and sets
311 // it to point to the actual super class. As far as the linker
312 // knows it is just a pointer to a string. But then someone wanted the
313 // linker to issue errors at build time if the superclass was not found.
314 // So they figured out a way in mach-o object format to use an absolute
315 // symbols (.objc_class_name_Foo = 0) and a floating reference
316 // (.reference .objc_class_name_Bar) to cause the linker into erroring when
317 // a class was missing.
318 // The following synthesizes the implicit .objc_* symbols for the linker
319 // from the ObjC data structures generated by the front end.
321 // special case if this data blob is an ObjC class definition
322 std::string Section = v->getSection();
323 if (Section.compare(0, 15, "__OBJC,__class,") == 0) {
324 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
329 // special case if this data blob is an ObjC category definition
330 else if (Section.compare(0, 18, "__OBJC,__category,") == 0) {
331 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
336 // special case if this data blob is the list of referenced classes
337 else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) {
338 if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
344 void LTOModule::addDefinedFunctionSymbol(const object::BasicSymbolRef &Sym) {
345 SmallString<64> Buffer;
347 raw_svector_ostream OS(Buffer);
352 cast<Function>(IRFile->getSymbolGV(Sym.getRawDataRefImpl()));
353 addDefinedFunctionSymbol(Buffer.c_str(), F);
356 void LTOModule::addDefinedFunctionSymbol(const char *Name, const Function *F) {
357 // add to list of defined symbols
358 addDefinedSymbol(Name, F, true);
361 void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
363 // set alignment part log2() can have rounding errors
364 uint32_t align = def->getAlignment();
365 uint32_t attr = align ? countTrailingZeros(align) : 0;
367 // set permissions part
369 attr |= LTO_SYMBOL_PERMISSIONS_CODE;
371 const GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
372 if (gv && gv->isConstant())
373 attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
375 attr |= LTO_SYMBOL_PERMISSIONS_DATA;
378 // set definition part
379 if (def->hasWeakLinkage() || def->hasLinkOnceLinkage())
380 attr |= LTO_SYMBOL_DEFINITION_WEAK;
381 else if (def->hasCommonLinkage())
382 attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
384 attr |= LTO_SYMBOL_DEFINITION_REGULAR;
387 if (def->hasLocalLinkage())
388 // Ignore visibility if linkage is local.
389 attr |= LTO_SYMBOL_SCOPE_INTERNAL;
390 else if (def->hasHiddenVisibility())
391 attr |= LTO_SYMBOL_SCOPE_HIDDEN;
392 else if (def->hasProtectedVisibility())
393 attr |= LTO_SYMBOL_SCOPE_PROTECTED;
394 else if (canBeOmittedFromSymbolTable(def))
395 attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
397 attr |= LTO_SYMBOL_SCOPE_DEFAULT;
399 StringSet::value_type &entry = _defines.GetOrCreateValue(Name);
402 // fill information structure
403 NameAndAttributes info;
404 StringRef NameRef = entry.getKey();
405 info.name = NameRef.data();
406 assert(info.name[NameRef.size()] == '\0');
407 info.attributes = attr;
408 info.isFunction = isFunction;
411 // add to table of symbols
412 _symbols.push_back(info);
415 /// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the
417 void LTOModule::addAsmGlobalSymbol(const char *name,
418 lto_symbol_attributes scope) {
419 StringSet::value_type &entry = _defines.GetOrCreateValue(name);
421 // only add new define if not already defined
422 if (entry.getValue())
427 NameAndAttributes &info = _undefines[entry.getKey().data()];
429 if (info.symbol == nullptr) {
430 // FIXME: This is trying to take care of module ASM like this:
432 // module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0"
434 // but is gross and its mother dresses it funny. Have the ASM parser give us
435 // more details for this type of situation so that we're not guessing so
438 // fill information structure
439 info.name = entry.getKey().data();
441 LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope;
442 info.isFunction = false;
443 info.symbol = nullptr;
445 // add to table of symbols
446 _symbols.push_back(info);
451 addDefinedFunctionSymbol(info.name, cast<Function>(info.symbol));
453 addDefinedDataSymbol(info.name, info.symbol);
455 _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK;
456 _symbols.back().attributes |= scope;
459 /// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the
461 void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
462 StringMap<NameAndAttributes>::value_type &entry =
463 _undefines.GetOrCreateValue(name);
465 _asm_undefines.push_back(entry.getKey().data());
467 // we already have the symbol
468 if (entry.getValue().name)
471 uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;
472 attr |= LTO_SYMBOL_SCOPE_DEFAULT;
473 NameAndAttributes info;
474 info.name = entry.getKey().data();
475 info.attributes = attr;
476 info.isFunction = false;
477 info.symbol = nullptr;
479 entry.setValue(info);
482 /// Add a symbol which isn't defined just yet to a list to be resolved later.
483 void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
485 SmallString<64> name;
487 raw_svector_ostream OS(name);
491 StringMap<NameAndAttributes>::value_type &entry =
492 _undefines.GetOrCreateValue(name);
494 // we already have the symbol
495 if (entry.getValue().name)
498 NameAndAttributes info;
500 info.name = entry.getKey().data();
502 const GlobalValue *decl = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
504 if (decl->hasExternalWeakLinkage())
505 info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
507 info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
509 info.isFunction = isFunc;
512 entry.setValue(info);
515 /// parseSymbols - Parse the symbols from the module and model-level ASM and add
516 /// them to either the defined or undefined lists.
517 bool LTOModule::parseSymbols(std::string &errMsg) {
518 for (auto &Sym : IRFile->symbols()) {
519 const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
520 uint32_t Flags = Sym.getFlags();
521 if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
524 bool IsUndefined = Flags & object::BasicSymbolRef::SF_Undefined;
527 SmallString<64> Buffer;
529 raw_svector_ostream OS(Buffer);
532 const char *Name = Buffer.c_str();
535 addAsmGlobalSymbolUndef(Name);
536 else if (Flags & object::BasicSymbolRef::SF_Global)
537 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_DEFAULT);
539 addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_INTERNAL);
543 auto *F = dyn_cast<Function>(GV);
545 addPotentialUndefinedSymbol(Sym, F != nullptr);
550 addDefinedFunctionSymbol(Sym);
554 if (isa<GlobalVariable>(GV)) {
555 addDefinedDataSymbol(Sym);
559 assert(isa<GlobalAlias>(GV));
560 addDefinedDataSymbol(Sym);
563 // make symbols for all undefines
564 for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(),
565 e = _undefines.end(); u != e; ++u) {
566 // If this symbol also has a definition, then don't make an undefine because
567 // it is a tentative definition.
568 if (_defines.count(u->getKey())) continue;
569 NameAndAttributes info = u->getValue();
570 _symbols.push_back(info);
576 /// parseMetadata - Parse metadata from the module
577 void LTOModule::parseMetadata() {
579 if (Value *Val = getModule().getModuleFlag("Linker Options")) {
580 MDNode *LinkerOptions = cast<MDNode>(Val);
581 for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) {
582 MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i));
583 for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) {
584 MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii));
585 StringRef Op = _linkeropt_strings.
586 GetOrCreateValue(MDOption->getString()).getKey();
587 StringRef DepLibName = _target->getSubtargetImpl()
588 ->getTargetLowering()
589 ->getObjFileLowering()
590 .getDepLibFromLinkerOpt(Op);
591 if (!DepLibName.empty())
592 _deplibs.push_back(DepLibName.data());
593 else if (!Op.empty())
594 _linkeropts.push_back(Op.data());
599 // Add other interesting metadata here.