1 //===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the ArchiveObjectFile class.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Object/Archive.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/Endian.h"
19 #include "llvm/Support/MemoryBuffer.h"
20 #include "llvm/Support/Path.h"
23 using namespace object;
24 using namespace llvm::support::endian;
26 static const char *const Magic = "!<arch>\n";
27 static const char *const ThinMagic = "!<thin>\n";
29 void Archive::anchor() { }
31 StringRef ArchiveMemberHeader::getName() const {
33 if (Name[0] == '/' || Name[0] == '#')
37 llvm::StringRef::size_type end =
38 llvm::StringRef(Name, sizeof(Name)).find(EndCond);
39 if (end == llvm::StringRef::npos)
41 assert(end <= sizeof(Name) && end > 0);
42 // Don't include the EndCond if there is one.
43 return llvm::StringRef(Name, end);
46 ErrorOr<uint32_t> ArchiveMemberHeader::getSize() const {
48 if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret))
49 return object_error::parse_failed; // Size is not a decimal number.
53 sys::fs::perms ArchiveMemberHeader::getAccessMode() const {
55 if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(" ").getAsInteger(8, Ret))
56 llvm_unreachable("Access mode is not an octal number.");
57 return static_cast<sys::fs::perms>(Ret);
60 sys::TimeValue ArchiveMemberHeader::getLastModified() const {
62 if (StringRef(LastModified, sizeof(LastModified)).rtrim(" ")
63 .getAsInteger(10, Seconds))
64 llvm_unreachable("Last modified time not a decimal number.");
67 Ret.fromEpochTime(Seconds);
71 unsigned ArchiveMemberHeader::getUID() const {
73 if (StringRef(UID, sizeof(UID)).rtrim(" ").getAsInteger(10, Ret))
74 llvm_unreachable("UID time not a decimal number.");
78 unsigned ArchiveMemberHeader::getGID() const {
80 if (StringRef(GID, sizeof(GID)).rtrim(" ").getAsInteger(10, Ret))
81 llvm_unreachable("GID time not a decimal number.");
85 Archive::Child::Child(const Archive *Parent, const char *Start,
91 uint64_t Size = sizeof(ArchiveMemberHeader);
92 Data = StringRef(Start, Size);
93 if (!isThinMember()) {
94 ErrorOr<uint64_t> MemberSize = getRawSize();
95 if (MemberSize.getError()) {
96 assert (EC && "Error must be caught");
97 *EC = MemberSize.getError();
100 Size += MemberSize.get();
101 Data = StringRef(Start, Size);
104 // Setup StartOfFile and PaddingBytes.
105 StartOfFile = sizeof(ArchiveMemberHeader);
106 // Don't include attached name.
107 StringRef Name = getRawName();
108 if (Name.startswith("#1/")) {
110 if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize)) {
112 *EC = object_error::parse_failed; // Long name offset is not an integer.
115 StartOfFile += NameSize;
119 ErrorOr<std::unique_ptr<Archive::Child>> Archive::Child::create(
120 const Archive *Parent, const char *Start) {
122 std::unique_ptr<Archive::Child> Ret(new Archive::Child(Parent, Start, &EC));
125 return std::move(Ret);
128 ErrorOr<uint64_t> Archive::Child::getSize() const {
129 if (Parent->IsThin) {
130 ErrorOr<uint32_t> Size = getHeader()->getSize();
131 if (std::error_code EC = Size.getError())
135 return Data.size() - StartOfFile;
138 ErrorOr<uint64_t> Archive::Child::getRawSize() const {
139 ErrorOr<uint32_t> Size = getHeader()->getSize();
140 if (std::error_code EC = Size.getError())
145 bool Archive::Child::isThinMember() const {
146 StringRef Name = getHeader()->getName();
147 return Parent->IsThin && Name != "/" && Name != "//";
150 ErrorOr<StringRef> Archive::Child::getBuffer() const {
151 if (!isThinMember()) {
152 ErrorOr<uint32_t> Size = getSize();
153 if (std::error_code EC = Size.getError())
155 return StringRef(Data.data() + StartOfFile, Size.get());
157 ErrorOr<StringRef> Name = getName();
158 if (std::error_code EC = Name.getError())
160 SmallString<128> FullName = sys::path::parent_path(
161 Parent->getMemoryBufferRef().getBufferIdentifier());
162 sys::path::append(FullName, *Name);
163 ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
164 if (std::error_code EC = Buf.getError())
166 Parent->ThinBuffers.push_back(std::move(*Buf));
167 return Parent->ThinBuffers.back()->getBuffer();
170 ErrorOr<Archive::Child> Archive::Child::getNext() const {
171 size_t SpaceToSkip = Data.size();
172 // If it's odd, add 1 to make it even.
177 const char *NextLoc = Data.data() + SpaceToSkip + Pad;
179 // Check to see if this is at the end of the archive.
180 if (NextLoc == Parent->Data.getBufferEnd() ||
181 NextLoc == Parent->Data.getBufferEnd() - Pad )
182 return Child(Parent, nullptr, nullptr);
184 // Check to see if this is past the end of the archive.
185 if (NextLoc > Parent->Data.getBufferEnd())
186 return object_error::parse_failed;
188 auto ChildOrErr = Child::create(Parent, NextLoc);
189 if (std::error_code EC = ChildOrErr.getError())
191 return std::move(*ChildOrErr.get());
194 uint64_t Archive::Child::getChildOffset() const {
195 const char *a = Parent->Data.getBuffer().data();
196 const char *c = Data.data();
197 uint64_t offset = c - a;
201 ErrorOr<StringRef> Archive::Child::getName() const {
202 StringRef name = getRawName();
203 // Check if it's a special name.
204 if (name[0] == '/') {
205 if (name.size() == 1) // Linker member.
207 if (name.size() == 2 && name[1] == '/') // String table.
212 if (name.substr(1).rtrim(" ").getAsInteger(10, offset))
213 return object_error::parse_failed; // Long name offset is not an integer.
214 // Check for bad stringtable iterator.
215 if (std::error_code EC = Parent->StringTable->getError())
217 const char *addr = (*Parent->StringTable)->Data.begin()
218 + sizeof(ArchiveMemberHeader)
221 auto Size = (*Parent->StringTable)->getSize();
222 if (std::error_code EC = Size.getError())
224 if (Parent->StringTable == Parent->child_end()
225 || addr < ((*Parent->StringTable)->Data.begin()
226 + sizeof(ArchiveMemberHeader))
227 || addr > ((*Parent->StringTable)->Data.begin()
228 + sizeof(ArchiveMemberHeader)
230 return object_error::parse_failed;
232 // GNU long file names end with a "/\n".
233 if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) {
234 StringRef::size_type End = StringRef(addr).find('\n');
235 return StringRef(addr, End - 1);
237 return StringRef(addr);
238 } else if (name.startswith("#1/")) {
240 if (name.substr(3).rtrim(" ").getAsInteger(10, name_size))
241 return object_error::parse_failed; // Long name offset is not an integer.
242 return Data.substr(sizeof(ArchiveMemberHeader), name_size)
243 .rtrim(StringRef("\0", 1));
245 // It's a simple name.
246 if (name[name.size() - 1] == '/')
247 return name.substr(0, name.size() - 1);
251 ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
252 ErrorOr<StringRef> NameOrErr = getName();
253 if (std::error_code EC = NameOrErr.getError())
255 StringRef Name = NameOrErr.get();
256 ErrorOr<StringRef> Buf = getBuffer();
257 if (std::error_code EC = Buf.getError())
259 return MemoryBufferRef(*Buf, Name);
262 ErrorOr<std::unique_ptr<Binary>>
263 Archive::Child::getAsBinary(LLVMContext *Context) const {
264 ErrorOr<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
265 if (std::error_code EC = BuffOrErr.getError())
268 return createBinary(BuffOrErr.get(), Context);
271 ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
273 std::unique_ptr<Archive> Ret(new Archive(Source, EC));
276 return std::move(Ret);
279 Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
280 : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()),
281 StringTable(child_end()), FirstRegular(child_end()) {
282 StringRef Buffer = Data.getBuffer();
283 // Check for sufficient magic.
284 if (Buffer.startswith(ThinMagic)) {
286 } else if (Buffer.startswith(Magic)) {
289 ec = object_error::invalid_file_type;
293 // Get the special members.
294 child_iterator i = child_begin(false);
295 child_iterator e = child_end();
302 StringRef Name = (*i)->getRawName();
304 // Below is the pattern that is used to figure out the archive format
305 // GNU archive format
306 // First member : / (may exist, if it exists, points to the symbol table )
307 // Second member : // (may exist, if it exists, points to the string table)
308 // Note : The string table is used if the filename exceeds 15 characters
309 // BSD archive format
310 // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
311 // There is no string table, if the filename exceeds 15 characters or has a
312 // embedded space, the filename has #1/<size>, The size represents the size
313 // of the filename that needs to be read after the archive header
314 // COFF archive format
316 // Second member : / (provides a directory of symbols)
317 // Third member : // (may exist, if it exists, contains the string table)
318 // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
319 // even if the string table is empty. However, lib.exe does not in fact
320 // seem to create the third member if there's no member whose filename
321 // exceeds 15 characters. So the third member is optional.
323 if (Name == "__.SYMDEF") {
333 ec = std::error_code();
337 if (Name.startswith("#1/")) {
339 // We know this is BSD, so getName will work since there is no string table.
340 ErrorOr<StringRef> NameOrErr = (*i)->getName();
341 ec = NameOrErr.getError();
344 Name = NameOrErr.get();
345 if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
357 // MIPS 64-bit ELF archives use a special format of a symbol table.
358 // This format is marked by `ar_name` field equals to "/SYM64/".
359 // For detailed description see page 96 in the following document:
360 // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
362 bool has64SymTable = false;
363 if (Name == "/" || Name == "/SYM64/") {
365 if (Name == "/SYM64/")
366 has64SymTable = true;
373 Name = (*i)->getRawName();
377 Format = has64SymTable ? K_MIPS64 : K_GNU;
385 ec = std::error_code();
389 if (Name[0] != '/') {
390 Format = has64SymTable ? K_MIPS64 : K_GNU;
392 ec = std::error_code();
397 ec = object_error::parse_failed;
411 ec = std::error_code();
415 Name = (*i)->getRawName();
427 ec = std::error_code();
430 Archive::child_iterator Archive::child_begin(bool SkipInternal) const {
431 if (Data.getBufferSize() == 8) // empty archive.
437 const char *Loc = Data.getBufferStart() + strlen(Magic);
438 auto ChildOrErr = Child::create(this, Loc);
439 if (std::error_code EC = ChildOrErr.getError())
440 return child_iterator(EC);
441 Child c = *(ChildOrErr.get());
442 return child_iterator(c);
445 Archive::child_iterator Archive::child_end() const {
446 // This with a second argument of nullptr can't return an Error.
447 auto ChildOrErr = Child::create(this, nullptr);
448 if (ChildOrErr.getError())
449 llvm_unreachable("Can't create Archive::child_end().");
450 Child c = *(ChildOrErr.get());
451 return child_iterator(c);
454 StringRef Archive::Symbol::getName() const {
455 return Parent->getSymbolTable().begin() + StringIndex;
458 ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
459 const char *Buf = Parent->getSymbolTable().begin();
460 const char *Offsets = Buf;
461 if (Parent->kind() == K_MIPS64)
462 Offsets += sizeof(uint64_t);
464 Offsets += sizeof(uint32_t);
466 if (Parent->kind() == K_GNU) {
467 Offset = read32be(Offsets + SymbolIndex * 4);
468 } else if (Parent->kind() == K_MIPS64) {
469 Offset = read64be(Offsets + SymbolIndex * 8);
470 } else if (Parent->kind() == K_BSD) {
471 // The SymbolIndex is an index into the ranlib structs that start at
472 // Offsets (the first uint32_t is the number of bytes of the ranlib
473 // structs). The ranlib structs are a pair of uint32_t's the first
474 // being a string table offset and the second being the offset into
475 // the archive of the member that defines the symbol. Which is what
477 Offset = read32le(Offsets + SymbolIndex * 8 + 4);
480 uint32_t MemberCount = read32le(Buf);
481 Buf += MemberCount * 4 + 4;
483 uint32_t SymbolCount = read32le(Buf);
484 if (SymbolIndex >= SymbolCount)
485 return object_error::parse_failed;
487 // Skip SymbolCount to get to the indices table.
488 const char *Indices = Buf + 4;
490 // Get the index of the offset in the file member offset table for this
492 uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
493 // Subtract 1 since OffsetIndex is 1 based.
496 if (OffsetIndex >= MemberCount)
497 return object_error::parse_failed;
499 Offset = read32le(Offsets + OffsetIndex * 4);
502 const char *Loc = Parent->getData().begin() + Offset;
503 auto ChildOrErr = Child::create(Parent, Loc);
504 if (std::error_code EC = ChildOrErr.getError())
506 child_iterator Iter(std::move(*ChildOrErr.get()));
510 Archive::Symbol Archive::Symbol::getNext() const {
512 if (Parent->kind() == K_BSD) {
513 // t.StringIndex is an offset from the start of the __.SYMDEF or
514 // "__.SYMDEF SORTED" member into the string table for the ranlib
515 // struct indexed by t.SymbolIndex . To change t.StringIndex to the
516 // offset in the string table for t.SymbolIndex+1 we subtract the
517 // its offset from the start of the string table for t.SymbolIndex
518 // and add the offset of the string table for t.SymbolIndex+1.
520 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
521 // which is the number of bytes of ranlib structs that follow. The ranlib
522 // structs are a pair of uint32_t's the first being a string table offset
523 // and the second being the offset into the archive of the member that
524 // define the symbol. After that the next uint32_t is the byte count of
525 // the string table followed by the string table.
526 const char *Buf = Parent->getSymbolTable().begin();
527 uint32_t RanlibCount = 0;
528 RanlibCount = read32le(Buf) / 8;
529 // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
530 // don't change the t.StringIndex as we don't want to reference a ranlib
532 if (t.SymbolIndex + 1 < RanlibCount) {
533 const char *Ranlibs = Buf + 4;
534 uint32_t CurRanStrx = 0;
535 uint32_t NextRanStrx = 0;
536 CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
537 NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
538 t.StringIndex -= CurRanStrx;
539 t.StringIndex += NextRanStrx;
542 // Go to one past next null.
543 t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
549 Archive::symbol_iterator Archive::symbol_begin() const {
550 if (!hasSymbolTable())
551 return symbol_iterator(Symbol(this, 0, 0));
553 const char *buf = getSymbolTable().begin();
554 if (kind() == K_GNU) {
555 uint32_t symbol_count = 0;
556 symbol_count = read32be(buf);
557 buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
558 } else if (kind() == K_MIPS64) {
559 uint64_t symbol_count = read64be(buf);
560 buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
561 } else if (kind() == K_BSD) {
562 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
563 // which is the number of bytes of ranlib structs that follow. The ranlib
564 // structs are a pair of uint32_t's the first being a string table offset
565 // and the second being the offset into the archive of the member that
566 // define the symbol. After that the next uint32_t is the byte count of
567 // the string table followed by the string table.
568 uint32_t ranlib_count = 0;
569 ranlib_count = read32le(buf) / 8;
570 const char *ranlibs = buf + 4;
571 uint32_t ran_strx = 0;
572 ran_strx = read32le(ranlibs);
573 buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
574 // Skip the byte count of the string table.
575 buf += sizeof(uint32_t);
578 uint32_t member_count = 0;
579 uint32_t symbol_count = 0;
580 member_count = read32le(buf);
581 buf += 4 + (member_count * 4); // Skip offsets.
582 symbol_count = read32le(buf);
583 buf += 4 + (symbol_count * 2); // Skip indices.
585 uint32_t string_start_offset = buf - getSymbolTable().begin();
586 return symbol_iterator(Symbol(this, 0, string_start_offset));
589 Archive::symbol_iterator Archive::symbol_end() const {
590 return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
593 uint32_t Archive::getNumberOfSymbols() const {
594 if (!hasSymbolTable())
596 const char *buf = getSymbolTable().begin();
598 return read32be(buf);
599 if (kind() == K_MIPS64)
600 return read64be(buf);
602 return read32le(buf) / 8;
603 uint32_t member_count = 0;
604 member_count = read32le(buf);
605 buf += 4 + (member_count * 4); // Skip offsets.
606 return read32le(buf);
609 Archive::child_iterator Archive::findSym(StringRef name) const {
610 Archive::symbol_iterator bs = symbol_begin();
611 Archive::symbol_iterator es = symbol_end();
613 for (; bs != es; ++bs) {
614 StringRef SymName = bs->getName();
615 if (SymName == name) {
616 ErrorOr<Archive::child_iterator> ResultOrErr = bs->getMember();
617 // FIXME: Should we really eat the error?
618 if (ResultOrErr.getError())
620 return ResultOrErr.get();
626 bool Archive::hasSymbolTable() const {
627 return SymbolTable != child_end();