1 //===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Implementation of the MC-JIT runtime dynamic linker.
12 //===----------------------------------------------------------------------===//
14 #define DEBUG_TYPE "dyld"
15 #include "llvm/ADT/OwningPtr.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/ExecutionEngine/RuntimeDyld.h"
21 #include "llvm/Object/MachOObject.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/Format.h"
25 #include "llvm/Support/Memory.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/system_error.h"
28 #include "llvm/Support/raw_ostream.h"
30 using namespace llvm::object;
32 // Empty out-of-line virtual destructor as the key function.
33 RTDyldMemoryManager::~RTDyldMemoryManager() {}
36 class RuntimeDyldImpl {
40 // The MemoryManager to load objects into.
41 RTDyldMemoryManager *MemMgr;
43 // Master symbol table. As modules are loaded and external symbols are
44 // resolved, their addresses are stored here.
45 StringMap<uint64_t> SymbolTable;
47 // FIXME: Should have multiple data blocks, one for each loaded chunk of
49 sys::MemoryBlock Data;
54 // Set the error state and record an error string.
55 bool Error(const Twine &Msg) {
61 bool resolveRelocation(uint32_t BaseSection, macho::RelocationEntry RE,
62 SmallVectorImpl<void *> &SectionBases,
63 SmallVectorImpl<StringRef> &SymbolNames);
64 bool resolveX86_64Relocation(intptr_t Address, intptr_t Value, bool isPCRel,
65 unsigned Type, unsigned Size);
66 bool resolveARMRelocation(intptr_t Address, intptr_t Value, bool isPCRel,
67 unsigned Type, unsigned Size);
69 bool loadSegment32(const MachOObject *Obj,
70 const MachOObject::LoadCommandInfo *SegmentLCI,
71 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
72 bool loadSegment64(const MachOObject *Obj,
73 const MachOObject::LoadCommandInfo *SegmentLCI,
74 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
77 RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
79 bool loadObject(MemoryBuffer *InputBuffer);
81 uint64_t getSymbolAddress(StringRef Name) {
82 // Use lookup() rather than [] because we don't want to add an entry
83 // if there isn't one already, which the [] operator does.
84 return SymbolTable.lookup(Name);
87 sys::MemoryBlock getMemoryBlock() { return Data; }
89 // Is the linker in an error state?
90 bool hasError() { return HasError; }
92 // Mark the error condition as handled and continue.
93 void clearError() { HasError = false; }
95 // Get the error message.
96 StringRef getErrorString() { return ErrorStr; }
99 // FIXME: Relocations for targets other than x86_64.
100 bool RuntimeDyldImpl::
101 resolveRelocation(uint32_t BaseSection, macho::RelocationEntry RE,
102 SmallVectorImpl<void *> &SectionBases,
103 SmallVectorImpl<StringRef> &SymbolNames) {
104 // struct relocation_info {
105 // int32_t r_address;
106 // uint32_t r_symbolnum:24,
112 uint32_t SymbolNum = RE.Word1 & 0xffffff; // 24-bit value
113 bool isPCRel = (RE.Word1 >> 24) & 1;
114 unsigned Log2Size = (RE.Word1 >> 25) & 3;
115 bool isExtern = (RE.Word1 >> 27) & 1;
116 unsigned Type = (RE.Word1 >> 28) & 0xf;
117 if (RE.Word0 & macho::RF_Scattered)
118 return Error("NOT YET IMPLEMENTED: scattered relocations.");
120 // The address requiring a relocation.
121 intptr_t Address = (intptr_t)SectionBases[BaseSection] + RE.Word0;
123 // Figure out the target address of the relocation. If isExtern is true,
124 // this relocation references the symbol table, otherwise it references
125 // a section in the same object, numbered from 1 through NumSections
126 // (SectionBases is [0, NumSections-1]).
129 StringRef Name = SymbolNames[SymbolNum];
130 if (SymbolTable.lookup(Name)) {
131 // The symbol is in our symbol table, so we can resolve it directly.
132 Value = (intptr_t)SymbolTable[Name];
134 return Error("NOT YET IMPLEMENTED: relocations to pre-compiled code.");
136 DEBUG(dbgs() << "Resolve relocation(" << Type << ") from '" << Name
137 << "' to " << format("0x%x", Address) << ".\n");
139 // For non-external relocations, the SymbolNum is actual a section number
140 // as described above.
141 Value = (intptr_t)SectionBases[SymbolNum - 1];
144 unsigned Size = 1 << Log2Size;
146 default: assert(0 && "Unsupported CPU type!");
147 case mach::CTM_x86_64:
148 return resolveX86_64Relocation(Address, Value, isPCRel, Type, Size);
150 return resolveARMRelocation(Address, Value, isPCRel, Type, Size);
152 llvm_unreachable("");
155 bool RuntimeDyldImpl::resolveX86_64Relocation(intptr_t Address, intptr_t Value,
156 bool isPCRel, unsigned Type,
158 // If the relocation is PC-relative, the value to be encoded is the
159 // pointer difference.
161 // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
162 // address. Is that expected? Only for branches, perhaps?
163 Value -= Address + 4;
167 llvm_unreachable("Invalid relocation type!");
168 case macho::RIT_X86_64_Unsigned:
169 case macho::RIT_X86_64_Branch: {
170 // Mask in the target value a byte at a time (we don't have an alignment
171 // guarantee for the target address, so this is safest).
172 uint8_t *p = (uint8_t*)Address;
173 for (unsigned i = 0; i < Size; ++i) {
174 *p++ = (uint8_t)Value;
179 case macho::RIT_X86_64_Signed:
180 case macho::RIT_X86_64_GOTLoad:
181 case macho::RIT_X86_64_GOT:
182 case macho::RIT_X86_64_Subtractor:
183 case macho::RIT_X86_64_Signed1:
184 case macho::RIT_X86_64_Signed2:
185 case macho::RIT_X86_64_Signed4:
186 case macho::RIT_X86_64_TLV:
187 return Error("Relocation type not implemented yet!");
192 bool RuntimeDyldImpl::resolveARMRelocation(intptr_t Address, intptr_t Value,
193 bool isPCRel, unsigned Type,
195 // If the relocation is PC-relative, the value to be encoded is the
196 // pointer difference.
199 // ARM PCRel relocations have an effective-PC offset of two instructions
200 // (four bytes in Thumb mode, 8 bytes in ARM mode).
201 // FIXME: For now, assume ARM mode.
207 case macho::RIT_Vanilla: {
208 llvm_unreachable("Invalid relocation type!");
209 // Mask in the target value a byte at a time (we don't have an alignment
210 // guarantee for the target address, so this is safest).
211 uint8_t *p = (uint8_t*)Address;
212 for (unsigned i = 0; i < Size; ++i) {
213 *p++ = (uint8_t)Value;
218 case macho::RIT_Pair:
219 case macho::RIT_Difference:
220 case macho::RIT_ARM_LocalDifference:
221 case macho::RIT_ARM_PreboundLazyPointer:
222 case macho::RIT_ARM_Branch24Bit: {
223 // Mask the value into the target address. We know instructions are
224 // 32-bit aligned, so we can do it all at once.
225 uint32_t *p = (uint32_t*)Address;
226 // The low two bits of the value are not encoded.
228 // Mask the value to 24 bits.
230 // FIXME: If the destination is a Thumb function (and the instruction
231 // is a non-predicated BL instruction), we need to change it to a BLX
232 // instruction instead.
234 // Insert the value into the instruction.
235 *p = (*p & ~0xffffff) | Value;
238 case macho::RIT_ARM_ThumbBranch22Bit:
239 case macho::RIT_ARM_ThumbBranch32Bit:
240 case macho::RIT_ARM_Half:
241 case macho::RIT_ARM_HalfDifference:
242 return Error("Relocation type not implemented yet!");
247 bool RuntimeDyldImpl::
248 loadSegment32(const MachOObject *Obj,
249 const MachOObject::LoadCommandInfo *SegmentLCI,
250 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
251 InMemoryStruct<macho::SegmentLoadCommand> Segment32LC;
252 Obj->ReadSegmentLoadCommand(*SegmentLCI, Segment32LC);
254 return Error("unable to load segment load command");
256 // Map the segment into memory.
257 std::string ErrorStr;
258 Data = sys::Memory::AllocateRWX(Segment32LC->VMSize, 0, &ErrorStr);
260 return Error("unable to allocate memory block: '" + ErrorStr + "'");
261 memcpy(Data.base(), Obj->getData(Segment32LC->FileOffset,
262 Segment32LC->FileSize).data(),
263 Segment32LC->FileSize);
264 memset((char*)Data.base() + Segment32LC->FileSize, 0,
265 Segment32LC->VMSize - Segment32LC->FileSize);
267 // Bind the section indices to addresses and record the relocations we
269 typedef std::pair<uint32_t, macho::RelocationEntry> RelocationMap;
270 SmallVector<RelocationMap, 64> Relocations;
272 SmallVector<void *, 16> SectionBases;
273 for (unsigned i = 0; i != Segment32LC->NumSections; ++i) {
274 InMemoryStruct<macho::Section> Sect;
275 Obj->ReadSection(*SegmentLCI, i, Sect);
277 return Error("unable to load section: '" + Twine(i) + "'");
279 // Remember any relocations the section has so we can resolve them later.
280 for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
281 InMemoryStruct<macho::RelocationEntry> RE;
282 Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
283 Relocations.push_back(RelocationMap(j, *RE));
286 // FIXME: Improve check.
287 // if (Sect->Flags != 0x80000400)
288 // return Error("unsupported section type!");
290 SectionBases.push_back((char*) Data.base() + Sect->Address);
293 // Bind all the symbols to address. Keep a record of the names for use
294 // by relocation resolution.
295 SmallVector<StringRef, 64> SymbolNames;
296 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
297 InMemoryStruct<macho::SymbolTableEntry> STE;
298 Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
300 return Error("unable to read symbol: '" + Twine(i) + "'");
301 // Get the symbol name.
302 StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
303 SymbolNames.push_back(Name);
305 // Just skip undefined symbols. They'll be loaded from whatever
306 // module they come from (or system dylib) when we resolve relocations
308 if (STE->SectionIndex == 0)
311 unsigned Index = STE->SectionIndex - 1;
312 if (Index >= Segment32LC->NumSections)
313 return Error("invalid section index for symbol: '" + Twine() + "'");
315 // Get the section base address.
316 void *SectionBase = SectionBases[Index];
318 // Get the symbol address.
319 uint64_t Address = (uint64_t)SectionBase + STE->Value;
321 // FIXME: Check the symbol type and flags.
322 if (STE->Type != 0xF)
323 return Error("unexpected symbol type!");
324 if (STE->Flags != 0x0)
325 return Error("unexpected symbol type!");
327 DEBUG(dbgs() << "Symbol: '" << Name << "' @ " << Address << "\n");
329 SymbolTable[Name] = Address;
332 // Now resolve any relocations.
333 for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
334 if (resolveRelocation(Relocations[i].first, Relocations[i].second,
335 SectionBases, SymbolNames))
339 // We've loaded the section; now mark the functions in it as executable.
340 // FIXME: We really should use the MemoryManager for this.
341 sys::Memory::setRangeExecutable(Data.base(), Data.size());
347 bool RuntimeDyldImpl::
348 loadSegment64(const MachOObject *Obj,
349 const MachOObject::LoadCommandInfo *SegmentLCI,
350 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
351 InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
352 Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
354 return Error("unable to load segment load command");
356 // Map the segment into memory.
357 std::string ErrorStr;
358 Data = sys::Memory::AllocateRWX(Segment64LC->VMSize, 0, &ErrorStr);
360 return Error("unable to allocate memory block: '" + ErrorStr + "'");
361 memcpy(Data.base(), Obj->getData(Segment64LC->FileOffset,
362 Segment64LC->FileSize).data(),
363 Segment64LC->FileSize);
364 memset((char*)Data.base() + Segment64LC->FileSize, 0,
365 Segment64LC->VMSize - Segment64LC->FileSize);
367 // Bind the section indices to addresses and record the relocations we
369 typedef std::pair<uint32_t, macho::RelocationEntry> RelocationMap;
370 SmallVector<RelocationMap, 64> Relocations;
372 SmallVector<void *, 16> SectionBases;
373 for (unsigned i = 0; i != Segment64LC->NumSections; ++i) {
374 InMemoryStruct<macho::Section64> Sect;
375 Obj->ReadSection64(*SegmentLCI, i, Sect);
377 return Error("unable to load section: '" + Twine(i) + "'");
379 // Remember any relocations the section has so we can resolve them later.
380 for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
381 InMemoryStruct<macho::RelocationEntry> RE;
382 Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
383 Relocations.push_back(RelocationMap(j, *RE));
386 // FIXME: Improve check.
387 if (Sect->Flags != 0x80000400)
388 return Error("unsupported section type!");
390 SectionBases.push_back((char*) Data.base() + Sect->Address);
393 // Bind all the symbols to address. Keep a record of the names for use
394 // by relocation resolution.
395 SmallVector<StringRef, 64> SymbolNames;
396 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
397 InMemoryStruct<macho::Symbol64TableEntry> STE;
398 Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
400 return Error("unable to read symbol: '" + Twine(i) + "'");
401 // Get the symbol name.
402 StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
403 SymbolNames.push_back(Name);
405 // Just skip undefined symbols. They'll be loaded from whatever
406 // module they come from (or system dylib) when we resolve relocations
408 if (STE->SectionIndex == 0)
411 unsigned Index = STE->SectionIndex - 1;
412 if (Index >= Segment64LC->NumSections)
413 return Error("invalid section index for symbol: '" + Twine() + "'");
415 // Get the section base address.
416 void *SectionBase = SectionBases[Index];
418 // Get the symbol address.
419 uint64_t Address = (uint64_t) SectionBase + STE->Value;
421 // FIXME: Check the symbol type and flags.
422 if (STE->Type != 0xF)
423 return Error("unexpected symbol type!");
424 if (STE->Flags != 0x0)
425 return Error("unexpected symbol type!");
427 DEBUG(dbgs() << "Symbol: '" << Name << "' @ " << Address << "\n");
428 SymbolTable[Name] = Address;
431 // Now resolve any relocations.
432 for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
433 if (resolveRelocation(Relocations[i].first, Relocations[i].second,
434 SectionBases, SymbolNames))
438 // We've loaded the section; now mark the functions in it as executable.
439 // FIXME: We really should use the MemoryManager for this.
440 sys::Memory::setRangeExecutable(Data.base(), Data.size());
445 bool RuntimeDyldImpl::loadObject(MemoryBuffer *InputBuffer) {
446 // If the linker is in an error state, don't do anything.
449 // Load the Mach-O wrapper object.
450 std::string ErrorStr;
451 OwningPtr<MachOObject> Obj(
452 MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
454 return Error("unable to load object: '" + ErrorStr + "'");
456 // Get the CPU type information from the header.
457 const macho::Header &Header = Obj->getHeader();
459 // FIXME: Error checking that the loaded object is compatible with
460 // the system we're running on.
461 CPUType = Header.CPUType;
462 CPUSubtype = Header.CPUSubtype;
464 // Validate that the load commands match what we expect.
465 const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
467 for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
468 const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
469 switch (LCI.Command.Type) {
470 case macho::LCT_Segment:
471 case macho::LCT_Segment64:
473 return Error("unexpected input object (multiple segments)");
476 case macho::LCT_Symtab:
478 return Error("unexpected input object (multiple symbol tables)");
481 case macho::LCT_Dysymtab:
483 return Error("unexpected input object (multiple symbol tables)");
487 return Error("unexpected input object (unexpected load command");
492 return Error("no symbol table found in object");
494 return Error("no symbol table found in object");
496 // Read and register the symbol table data.
497 InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
498 Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
500 return Error("unable to load symbol table load command");
501 Obj->RegisterStringTable(*SymtabLC);
503 // Read the dynamic link-edit information, if present (not present in static
506 InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
507 Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
509 return Error("unable to load dynamic link-exit load command");
511 // FIXME: We don't support anything interesting yet.
512 // if (DysymtabLC->LocalSymbolsIndex != 0)
513 // return Error("NOT YET IMPLEMENTED: local symbol entries");
514 // if (DysymtabLC->ExternalSymbolsIndex != 0)
515 // return Error("NOT YET IMPLEMENTED: non-external symbol entries");
516 // if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
517 // return Error("NOT YET IMPLEMENTED: undefined symbol entries");
520 // Load the segment load command.
521 if (SegmentLCI->Command.Type == macho::LCT_Segment) {
522 if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
525 if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
533 //===----------------------------------------------------------------------===//
534 // RuntimeDyld class implementation
535 RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *MM) {
536 Dyld = new RuntimeDyldImpl(MM);
539 RuntimeDyld::~RuntimeDyld() {
543 bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) {
544 return Dyld->loadObject(InputBuffer);
547 uint64_t RuntimeDyld::getSymbolAddress(StringRef Name) {
548 return Dyld->getSymbolAddress(Name);
551 sys::MemoryBlock RuntimeDyld::getMemoryBlock() {
552 return Dyld->getMemoryBlock();
555 StringRef RuntimeDyld::getErrorString() {
556 return Dyld->getErrorString();
559 } // end namespace llvm