1 //===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // Implementation of the MC-JIT runtime dynamic linker.
12 //===----------------------------------------------------------------------===//
14 #define DEBUG_TYPE "dyld"
15 #include "llvm/ADT/OwningPtr.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/Twine.h"
20 #include "llvm/ExecutionEngine/RuntimeDyld.h"
21 #include "llvm/Object/MachOObject.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/Format.h"
25 #include "llvm/Support/Memory.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/system_error.h"
28 #include "llvm/Support/raw_ostream.h"
30 using namespace llvm::object;
33 class RuntimeDyldImpl {
37 // Master symbol table. As modules are loaded and external symbols are
38 // resolved, their addresses are stored here.
39 StringMap<void*> SymbolTable;
41 // FIXME: Should have multiple data blocks, one for each loaded chunk of
43 sys::MemoryBlock Data;
48 // Set the error state and record an error string.
49 bool Error(const Twine &Msg) {
55 bool resolveRelocation(uint32_t BaseSection, macho::RelocationEntry RE,
56 SmallVectorImpl<void *> &SectionBases,
57 SmallVectorImpl<StringRef> &SymbolNames);
58 bool resolveX86_64Relocation(intptr_t Address, intptr_t Value, bool isPCRel,
59 unsigned Type, unsigned Size);
60 bool resolveARMRelocation(intptr_t Address, intptr_t Value, bool isPCRel,
61 unsigned Type, unsigned Size);
63 bool loadSegment32(const MachOObject *Obj,
64 const MachOObject::LoadCommandInfo *SegmentLCI,
65 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
66 bool loadSegment64(const MachOObject *Obj,
67 const MachOObject::LoadCommandInfo *SegmentLCI,
68 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
71 RuntimeDyldImpl() : HasError(false) {}
73 bool loadObject(MemoryBuffer *InputBuffer);
75 void *getSymbolAddress(StringRef Name) {
76 // Use lookup() rather than [] because we don't want to add an entry
77 // if there isn't one already, which the [] operator does.
78 return SymbolTable.lookup(Name);
81 sys::MemoryBlock getMemoryBlock() { return Data; }
83 // Is the linker in an error state?
84 bool hasError() { return HasError; }
86 // Mark the error condition as handled and continue.
87 void clearError() { HasError = false; }
89 // Get the error message.
90 StringRef getErrorString() { return ErrorStr; }
93 // FIXME: Relocations for targets other than x86_64.
94 bool RuntimeDyldImpl::
95 resolveRelocation(uint32_t BaseSection, macho::RelocationEntry RE,
96 SmallVectorImpl<void *> &SectionBases,
97 SmallVectorImpl<StringRef> &SymbolNames) {
98 // struct relocation_info {
100 // uint32_t r_symbolnum:24,
106 uint32_t SymbolNum = RE.Word1 & 0xffffff; // 24-bit value
107 bool isPCRel = (RE.Word1 >> 24) & 1;
108 unsigned Log2Size = (RE.Word1 >> 25) & 3;
109 bool isExtern = (RE.Word1 >> 27) & 1;
110 unsigned Type = (RE.Word1 >> 28) & 0xf;
111 if (RE.Word0 & macho::RF_Scattered)
112 return Error("NOT YET IMPLEMENTED: scattered relocations.");
114 // The address requiring a relocation.
115 intptr_t Address = (intptr_t)SectionBases[BaseSection] + RE.Word0;
117 // Figure out the target address of the relocation. If isExtern is true,
118 // this relocation references the symbol table, otherwise it references
119 // a section in the same object, numbered from 1 through NumSections
120 // (SectionBases is [0, NumSections-1]).
123 StringRef Name = SymbolNames[SymbolNum];
124 if (SymbolTable.lookup(Name)) {
125 // The symbol is in our symbol table, so we can resolve it directly.
126 Value = (intptr_t)SymbolTable[Name];
128 return Error("NOT YET IMPLEMENTED: relocations to pre-compiled code.");
130 DEBUG(dbgs() << "Resolve relocation(" << Type << ") from '" << Name
131 << "' to " << format("0x%x", Address) << ".\n");
133 // For non-external relocations, the SymbolNum is actual a section number
134 // as described above.
135 Value = (intptr_t)SectionBases[SymbolNum - 1];
138 unsigned Size = 1 << Log2Size;
140 default: assert(0 && "Unsupported CPU type!");
141 case mach::CTM_x86_64:
142 return resolveX86_64Relocation(Address, Value, isPCRel, Type, Size);
144 return resolveARMRelocation(Address, Value, isPCRel, Type, Size);
146 llvm_unreachable("");
149 bool RuntimeDyldImpl::resolveX86_64Relocation(intptr_t Address, intptr_t Value,
150 bool isPCRel, unsigned Type,
152 // If the relocation is PC-relative, the value to be encoded is the
153 // pointer difference.
155 // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
156 // address. Is that expected? Only for branches, perhaps?
157 Value -= Address + 4;
161 llvm_unreachable("Invalid relocation type!");
162 case macho::RIT_X86_64_Unsigned:
163 case macho::RIT_X86_64_Branch: {
164 // Mask in the target value a byte at a time (we don't have an alignment
165 // guarantee for the target address, so this is safest).
166 uint8_t *p = (uint8_t*)Address;
167 for (unsigned i = 0; i < Size; ++i) {
168 *p++ = (uint8_t)Value;
173 case macho::RIT_X86_64_Signed:
174 case macho::RIT_X86_64_GOTLoad:
175 case macho::RIT_X86_64_GOT:
176 case macho::RIT_X86_64_Subtractor:
177 case macho::RIT_X86_64_Signed1:
178 case macho::RIT_X86_64_Signed2:
179 case macho::RIT_X86_64_Signed4:
180 case macho::RIT_X86_64_TLV:
181 return Error("Relocation type not implemented yet!");
186 bool RuntimeDyldImpl::resolveARMRelocation(intptr_t Address, intptr_t Value,
187 bool isPCRel, unsigned Type,
189 // If the relocation is PC-relative, the value to be encoded is the
190 // pointer difference.
193 // ARM PCRel relocations have an effective-PC offset of two instructions
194 // (four bytes in Thumb mode, 8 bytes in ARM mode).
195 // FIXME: For now, assume ARM mode.
201 case macho::RIT_Vanilla: {
202 llvm_unreachable("Invalid relocation type!");
203 // Mask in the target value a byte at a time (we don't have an alignment
204 // guarantee for the target address, so this is safest).
205 uint8_t *p = (uint8_t*)Address;
206 for (unsigned i = 0; i < Size; ++i) {
207 *p++ = (uint8_t)Value;
212 case macho::RIT_Pair:
213 case macho::RIT_Difference:
214 case macho::RIT_ARM_LocalDifference:
215 case macho::RIT_ARM_PreboundLazyPointer:
216 case macho::RIT_ARM_Branch24Bit:
217 case macho::RIT_ARM_ThumbBranch22Bit:
218 case macho::RIT_ARM_ThumbBranch32Bit:
219 case macho::RIT_ARM_Half:
220 case macho::RIT_ARM_HalfDifference:
221 return Error("Relocation type not implemented yet!");
226 bool RuntimeDyldImpl::
227 loadSegment32(const MachOObject *Obj,
228 const MachOObject::LoadCommandInfo *SegmentLCI,
229 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
230 InMemoryStruct<macho::SegmentLoadCommand> Segment32LC;
231 Obj->ReadSegmentLoadCommand(*SegmentLCI, Segment32LC);
233 return Error("unable to load segment load command");
235 // Map the segment into memory.
236 std::string ErrorStr;
237 Data = sys::Memory::AllocateRWX(Segment32LC->VMSize, 0, &ErrorStr);
239 return Error("unable to allocate memory block: '" + ErrorStr + "'");
240 memcpy(Data.base(), Obj->getData(Segment32LC->FileOffset,
241 Segment32LC->FileSize).data(),
242 Segment32LC->FileSize);
243 memset((char*)Data.base() + Segment32LC->FileSize, 0,
244 Segment32LC->VMSize - Segment32LC->FileSize);
246 // Bind the section indices to address.
247 SmallVector<void *, 16> SectionBases;
248 for (unsigned i = 0; i != Segment32LC->NumSections; ++i) {
249 InMemoryStruct<macho::Section> Sect;
250 Obj->ReadSection(*SegmentLCI, i, Sect);
252 return Error("unable to load section: '" + Twine(i) + "'");
254 // FIXME: We don't support relocations yet.
255 if (Sect->NumRelocationTableEntries != 0)
256 return Error("not yet implemented: relocations!");
258 // FIXME: Improve check.
259 if (Sect->Flags != 0x80000400)
260 return Error("unsupported section type!");
262 SectionBases.push_back((char*) Data.base() + Sect->Address);
265 // Bind all the symbols to address.
266 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
267 InMemoryStruct<macho::SymbolTableEntry> STE;
268 Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
270 return Error("unable to read symbol: '" + Twine(i) + "'");
271 if (STE->SectionIndex == 0)
272 return Error("unexpected undefined symbol!");
274 unsigned Index = STE->SectionIndex - 1;
275 if (Index >= Segment32LC->NumSections)
276 return Error("invalid section index for symbol: '" + Twine() + "'");
278 // Get the symbol name.
279 StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
281 // Get the section base address.
282 void *SectionBase = SectionBases[Index];
284 // Get the symbol address.
285 void *Address = (char*) SectionBase + STE->Value;
287 // FIXME: Check the symbol type and flags.
288 if (STE->Type != 0xF)
289 return Error("unexpected symbol type!");
290 if (STE->Flags != 0x0)
291 return Error("unexpected symbol type!");
293 DEBUG(dbgs() << "Symbol: '" << Name << "' @ " << Address << "\n");
295 SymbolTable[Name] = Address;
298 // We've loaded the section; now mark the functions in it as executable.
299 // FIXME: We really should use the JITMemoryManager for this.
300 sys::Memory::setRangeExecutable(Data.base(), Data.size());
306 bool RuntimeDyldImpl::
307 loadSegment64(const MachOObject *Obj,
308 const MachOObject::LoadCommandInfo *SegmentLCI,
309 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
310 InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
311 Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
313 return Error("unable to load segment load command");
315 // Map the segment into memory.
316 std::string ErrorStr;
317 Data = sys::Memory::AllocateRWX(Segment64LC->VMSize, 0, &ErrorStr);
319 return Error("unable to allocate memory block: '" + ErrorStr + "'");
320 memcpy(Data.base(), Obj->getData(Segment64LC->FileOffset,
321 Segment64LC->FileSize).data(),
322 Segment64LC->FileSize);
323 memset((char*)Data.base() + Segment64LC->FileSize, 0,
324 Segment64LC->VMSize - Segment64LC->FileSize);
326 // Bind the section indices to addresses and record the relocations we
328 typedef std::pair<uint32_t, macho::RelocationEntry> RelocationMap;
329 SmallVector<RelocationMap, 64> Relocations;
331 SmallVector<void *, 16> SectionBases;
332 for (unsigned i = 0; i != Segment64LC->NumSections; ++i) {
333 InMemoryStruct<macho::Section64> Sect;
334 Obj->ReadSection64(*SegmentLCI, i, Sect);
336 return Error("unable to load section: '" + Twine(i) + "'");
338 // Resolve any relocations the section has.
339 for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
340 InMemoryStruct<macho::RelocationEntry> RE;
341 Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
342 Relocations.push_back(RelocationMap(j, *RE));
345 // FIXME: Improve check.
346 if (Sect->Flags != 0x80000400)
347 return Error("unsupported section type!");
349 SectionBases.push_back((char*) Data.base() + Sect->Address);
352 // Bind all the symbols to address. Keep a record of the names for use
353 // by relocation resolution.
354 SmallVector<StringRef, 64> SymbolNames;
355 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
356 InMemoryStruct<macho::Symbol64TableEntry> STE;
357 Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
359 return Error("unable to read symbol: '" + Twine(i) + "'");
360 // Get the symbol name.
361 StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
362 SymbolNames.push_back(Name);
364 // Just skip undefined symbols. They'll be loaded from whatever
365 // module they come from (or system dylib) when we resolve relocations
367 if (STE->SectionIndex == 0)
370 unsigned Index = STE->SectionIndex - 1;
371 if (Index >= Segment64LC->NumSections)
372 return Error("invalid section index for symbol: '" + Twine() + "'");
374 // Get the section base address.
375 void *SectionBase = SectionBases[Index];
377 // Get the symbol address.
378 void *Address = (char*) SectionBase + STE->Value;
380 // FIXME: Check the symbol type and flags.
381 if (STE->Type != 0xF)
382 return Error("unexpected symbol type!");
383 if (STE->Flags != 0x0)
384 return Error("unexpected symbol type!");
386 DEBUG(dbgs() << "Symbol: '" << Name << "' @ " << Address << "\n");
387 SymbolTable[Name] = Address;
390 // Now resolve any relocations.
391 for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
392 if (resolveRelocation(Relocations[i].first, Relocations[i].second,
393 SectionBases, SymbolNames))
397 // We've loaded the section; now mark the functions in it as executable.
398 // FIXME: We really should use the JITMemoryManager for this.
399 sys::Memory::setRangeExecutable(Data.base(), Data.size());
404 bool RuntimeDyldImpl::loadObject(MemoryBuffer *InputBuffer) {
405 // If the linker is in an error state, don't do anything.
408 // Load the Mach-O wrapper object.
409 std::string ErrorStr;
410 OwningPtr<MachOObject> Obj(
411 MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
413 return Error("unable to load object: '" + ErrorStr + "'");
415 // Get the CPU type information from the header.
416 const macho::Header &Header = Obj->getHeader();
418 // FIXME: Error checking that the loaded object is compatible with
419 // the system we're running on.
420 CPUType = Header.CPUType;
421 CPUSubtype = Header.CPUSubtype;
423 // Validate that the load commands match what we expect.
424 const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
426 for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
427 const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
428 switch (LCI.Command.Type) {
429 case macho::LCT_Segment:
430 case macho::LCT_Segment64:
432 return Error("unexpected input object (multiple segments)");
435 case macho::LCT_Symtab:
437 return Error("unexpected input object (multiple symbol tables)");
440 case macho::LCT_Dysymtab:
442 return Error("unexpected input object (multiple symbol tables)");
446 return Error("unexpected input object (unexpected load command");
451 return Error("no symbol table found in object");
453 return Error("no symbol table found in object");
455 // Read and register the symbol table data.
456 InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
457 Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
459 return Error("unable to load symbol table load command");
460 Obj->RegisterStringTable(*SymtabLC);
462 // Read the dynamic link-edit information, if present (not present in static
465 InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
466 Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
468 return Error("unable to load dynamic link-exit load command");
470 // FIXME: We don't support anything interesting yet.
471 // if (DysymtabLC->LocalSymbolsIndex != 0)
472 // return Error("NOT YET IMPLEMENTED: local symbol entries");
473 // if (DysymtabLC->ExternalSymbolsIndex != 0)
474 // return Error("NOT YET IMPLEMENTED: non-external symbol entries");
475 // if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
476 // return Error("NOT YET IMPLEMENTED: undefined symbol entries");
479 // Load the segment load command.
480 if (SegmentLCI->Command.Type == macho::LCT_Segment) {
481 if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
484 if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
492 //===----------------------------------------------------------------------===//
493 // RuntimeDyld class implementation
494 RuntimeDyld::RuntimeDyld() {
495 Dyld = new RuntimeDyldImpl;
498 RuntimeDyld::~RuntimeDyld() {
502 bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) {
503 return Dyld->loadObject(InputBuffer);
506 void *RuntimeDyld::getSymbolAddress(StringRef Name) {
507 return Dyld->getSymbolAddress(Name);
510 sys::MemoryBlock RuntimeDyld::getMemoryBlock() {
511 return Dyld->getMemoryBlock();
514 StringRef RuntimeDyld::getErrorString() {
515 return Dyld->getErrorString();
518 } // end namespace llvm