1 //===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the ArchiveObjectFile class.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Object/Archive.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Support/Endian.h"
19 #include "llvm/Support/MemoryBuffer.h"
22 using namespace object;
24 static const char *const Magic = "!<arch>\n";
25 static const char *const ThinMagic = "!<thin>\n";
27 void Archive::anchor() { }
29 StringRef ArchiveMemberHeader::getName() const {
31 if (Name[0] == '/' || Name[0] == '#')
35 llvm::StringRef::size_type end =
36 llvm::StringRef(Name, sizeof(Name)).find(EndCond);
37 if (end == llvm::StringRef::npos)
39 assert(end <= sizeof(Name) && end > 0);
40 // Don't include the EndCond if there is one.
41 return llvm::StringRef(Name, end);
44 uint32_t ArchiveMemberHeader::getSize() const {
46 if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret))
47 llvm_unreachable("Size is not a decimal number.");
51 sys::fs::perms ArchiveMemberHeader::getAccessMode() const {
53 if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(" ").getAsInteger(8, Ret))
54 llvm_unreachable("Access mode is not an octal number.");
55 return static_cast<sys::fs::perms>(Ret);
58 sys::TimeValue ArchiveMemberHeader::getLastModified() const {
60 if (StringRef(LastModified, sizeof(LastModified)).rtrim(" ")
61 .getAsInteger(10, Seconds))
62 llvm_unreachable("Last modified time not a decimal number.");
65 Ret.fromEpochTime(Seconds);
69 unsigned ArchiveMemberHeader::getUID() const {
71 if (StringRef(UID, sizeof(UID)).rtrim(" ").getAsInteger(10, Ret))
72 llvm_unreachable("UID time not a decimal number.");
76 unsigned ArchiveMemberHeader::getGID() const {
78 if (StringRef(GID, sizeof(GID)).rtrim(" ").getAsInteger(10, Ret))
79 llvm_unreachable("GID time not a decimal number.");
83 Archive::Child::Child(const Archive *Parent, const char *Start)
88 const ArchiveMemberHeader *Header =
89 reinterpret_cast<const ArchiveMemberHeader *>(Start);
90 uint64_t Size = sizeof(ArchiveMemberHeader);
91 if (!Parent->IsThin || Header->getName() == "/" || Header->getName() == "//")
92 Size += Header->getSize();
93 Data = StringRef(Start, Size);
95 // Setup StartOfFile and PaddingBytes.
96 StartOfFile = sizeof(ArchiveMemberHeader);
97 // Don't include attached name.
98 StringRef Name = Header->getName();
99 if (Name.startswith("#1/")) {
101 if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize))
102 llvm_unreachable("Long name length is not an integer");
103 StartOfFile += NameSize;
107 uint64_t Archive::Child::getSize() const {
109 return getHeader()->getSize();
110 return Data.size() - StartOfFile;
113 Archive::Child Archive::Child::getNext() const {
114 size_t SpaceToSkip = Data.size();
115 // If it's odd, add 1 to make it even.
119 const char *NextLoc = Data.data() + SpaceToSkip;
121 // Check to see if this is past the end of the archive.
122 if (NextLoc >= Parent->Data.getBufferEnd())
123 return Child(Parent, nullptr);
125 return Child(Parent, NextLoc);
128 ErrorOr<StringRef> Archive::Child::getName() const {
129 StringRef name = getRawName();
130 // Check if it's a special name.
131 if (name[0] == '/') {
132 if (name.size() == 1) // Linker member.
134 if (name.size() == 2 && name[1] == '/') // String table.
139 if (name.substr(1).rtrim(" ").getAsInteger(10, offset))
140 llvm_unreachable("Long name offset is not an integer");
141 const char *addr = Parent->StringTable->Data.begin()
142 + sizeof(ArchiveMemberHeader)
145 if (Parent->StringTable == Parent->child_end()
146 || addr < (Parent->StringTable->Data.begin()
147 + sizeof(ArchiveMemberHeader))
148 || addr > (Parent->StringTable->Data.begin()
149 + sizeof(ArchiveMemberHeader)
150 + Parent->StringTable->getSize()))
151 return object_error::parse_failed;
153 // GNU long file names end with a /.
154 if (Parent->kind() == K_GNU) {
155 StringRef::size_type End = StringRef(addr).find('/');
156 return StringRef(addr, End);
158 return StringRef(addr);
159 } else if (name.startswith("#1/")) {
161 if (name.substr(3).rtrim(" ").getAsInteger(10, name_size))
162 llvm_unreachable("Long name length is not an ingeter");
163 return Data.substr(sizeof(ArchiveMemberHeader), name_size)
164 .rtrim(StringRef("\0", 1));
166 // It's a simple name.
167 if (name[name.size() - 1] == '/')
168 return name.substr(0, name.size() - 1);
172 ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
173 ErrorOr<StringRef> NameOrErr = getName();
174 if (std::error_code EC = NameOrErr.getError())
176 StringRef Name = NameOrErr.get();
177 return MemoryBufferRef(getBuffer(), Name);
180 ErrorOr<std::unique_ptr<Binary>>
181 Archive::Child::getAsBinary(LLVMContext *Context) const {
182 ErrorOr<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
183 if (std::error_code EC = BuffOrErr.getError())
186 return createBinary(BuffOrErr.get(), Context);
189 ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
191 std::unique_ptr<Archive> Ret(new Archive(Source, EC));
194 return std::move(Ret);
197 Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
198 : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()) {
199 StringRef Buffer = Data.getBuffer();
200 // Check for sufficient magic.
201 if (Buffer.startswith(ThinMagic)) {
203 } else if (Buffer.startswith(Magic)) {
206 ec = object_error::invalid_file_type;
210 // Get the special members.
211 child_iterator i = child_begin(false);
212 child_iterator e = child_end();
215 ec = object_error::success;
219 StringRef Name = i->getRawName();
221 // Below is the pattern that is used to figure out the archive format
222 // GNU archive format
223 // First member : / (may exist, if it exists, points to the symbol table )
224 // Second member : // (may exist, if it exists, points to the string table)
225 // Note : The string table is used if the filename exceeds 15 characters
226 // BSD archive format
227 // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
228 // There is no string table, if the filename exceeds 15 characters or has a
229 // embedded space, the filename has #1/<size>, The size represents the size
230 // of the filename that needs to be read after the archive header
231 // COFF archive format
233 // Second member : / (provides a directory of symbols)
234 // Third member : // (may exist, if it exists, contains the string table)
235 // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
236 // even if the string table is empty. However, lib.exe does not in fact
237 // seem to create the third member if there's no member whose filename
238 // exceeds 15 characters. So the third member is optional.
240 if (Name == "__.SYMDEF") {
245 ec = object_error::success;
249 if (Name.startswith("#1/")) {
251 // We know this is BSD, so getName will work since there is no string table.
252 ErrorOr<StringRef> NameOrErr = i->getName();
253 ec = NameOrErr.getError();
256 Name = NameOrErr.get();
257 if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
270 ec = object_error::parse_failed;
273 Name = i->getRawName();
281 ec = object_error::success;
285 if (Name[0] != '/') {
288 ec = object_error::success;
293 ec = object_error::parse_failed;
303 ec = object_error::success;
307 Name = i->getRawName();
315 ec = object_error::success;
318 Archive::child_iterator Archive::child_begin(bool SkipInternal) const {
319 if (Data.getBufferSize() == 8) // empty archive.
325 const char *Loc = Data.getBufferStart() + strlen(Magic);
330 Archive::child_iterator Archive::child_end() const {
331 return Child(this, nullptr);
334 StringRef Archive::Symbol::getName() const {
335 return Parent->SymbolTable->getBuffer().begin() + StringIndex;
338 ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
339 const char *Buf = Parent->SymbolTable->getBuffer().begin();
340 const char *Offsets = Buf + 4;
342 if (Parent->kind() == K_GNU) {
343 Offset = *(reinterpret_cast<const support::ubig32_t*>(Offsets)
345 } else if (Parent->kind() == K_BSD) {
346 // The SymbolIndex is an index into the ranlib structs that start at
347 // Offsets (the first uint32_t is the number of bytes of the ranlib
348 // structs). The ranlib structs are a pair of uint32_t's the first
349 // being a string table offset and the second being the offset into
350 // the archive of the member that defines the symbol. Which is what
352 Offset = *(reinterpret_cast<const support::ulittle32_t *>(Offsets) +
353 (SymbolIndex * 2) + 1);
355 uint32_t MemberCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
358 Buf += sizeof(support::ulittle32_t)
359 + (MemberCount * sizeof(support::ulittle32_t));
361 uint32_t SymbolCount = *reinterpret_cast<const support::ulittle32_t*>(Buf);
363 if (SymbolIndex >= SymbolCount)
364 return object_error::parse_failed;
366 // Skip SymbolCount to get to the indices table.
367 const char *Indices = Buf + sizeof(support::ulittle32_t);
369 // Get the index of the offset in the file member offset table for this
371 uint16_t OffsetIndex =
372 *(reinterpret_cast<const support::ulittle16_t*>(Indices)
374 // Subtract 1 since OffsetIndex is 1 based.
377 if (OffsetIndex >= MemberCount)
378 return object_error::parse_failed;
380 Offset = *(reinterpret_cast<const support::ulittle32_t*>(Offsets)
384 const char *Loc = Parent->getData().begin() + Offset;
385 child_iterator Iter(Child(Parent, Loc));
389 Archive::Symbol Archive::Symbol::getNext() const {
391 if (Parent->kind() == K_BSD) {
392 // t.StringIndex is an offset from the start of the __.SYMDEF or
393 // "__.SYMDEF SORTED" member into the string table for the ranlib
394 // struct indexed by t.SymbolIndex . To change t.StringIndex to the
395 // offset in the string table for t.SymbolIndex+1 we subtract the
396 // its offset from the start of the string table for t.SymbolIndex
397 // and add the offset of the string table for t.SymbolIndex+1.
399 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
400 // which is the number of bytes of ranlib structs that follow. The ranlib
401 // structs are a pair of uint32_t's the first being a string table offset
402 // and the second being the offset into the archive of the member that
403 // define the symbol. After that the next uint32_t is the byte count of
404 // the string table followed by the string table.
405 const char *Buf = Parent->SymbolTable->getBuffer().begin();
406 uint32_t RanlibCount = 0;
407 RanlibCount = (*reinterpret_cast<const support::ulittle32_t *>(Buf)) /
408 (sizeof(uint32_t) * 2);
409 // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
410 // don't change the t.StringIndex as we don't want to reference a ranlib
412 if (t.SymbolIndex + 1 < RanlibCount) {
413 const char *Ranlibs = Buf + 4;
414 uint32_t CurRanStrx = 0;
415 uint32_t NextRanStrx = 0;
416 CurRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) +
417 (t.SymbolIndex * 2));
418 NextRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) +
419 ((t.SymbolIndex + 1) * 2));
420 t.StringIndex -= CurRanStrx;
421 t.StringIndex += NextRanStrx;
424 // Go to one past next null.
426 Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1;
432 Archive::symbol_iterator Archive::symbol_begin() const {
433 if (!hasSymbolTable())
434 return symbol_iterator(Symbol(this, 0, 0));
436 const char *buf = SymbolTable->getBuffer().begin();
437 if (kind() == K_GNU) {
438 uint32_t symbol_count = 0;
439 symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
440 buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
441 } else if (kind() == K_BSD) {
442 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
443 // which is the number of bytes of ranlib structs that follow. The ranlib
444 // structs are a pair of uint32_t's the first being a string table offset
445 // and the second being the offset into the archive of the member that
446 // define the symbol. After that the next uint32_t is the byte count of
447 // the string table followed by the string table.
448 uint32_t ranlib_count = 0;
449 ranlib_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) /
450 (sizeof(uint32_t) * 2);
451 const char *ranlibs = buf + 4;
452 uint32_t ran_strx = 0;
453 ran_strx = *(reinterpret_cast<const support::ulittle32_t *>(ranlibs));
454 buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
455 // Skip the byte count of the string table.
456 buf += sizeof(uint32_t);
459 uint32_t member_count = 0;
460 uint32_t symbol_count = 0;
461 member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
462 buf += 4 + (member_count * 4); // Skip offsets.
463 symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
464 buf += 4 + (symbol_count * 2); // Skip indices.
466 uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin();
467 return symbol_iterator(Symbol(this, 0, string_start_offset));
470 Archive::symbol_iterator Archive::symbol_end() const {
471 if (!hasSymbolTable())
472 return symbol_iterator(Symbol(this, 0, 0));
474 const char *buf = SymbolTable->getBuffer().begin();
475 uint32_t symbol_count = 0;
476 if (kind() == K_GNU) {
477 symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf);
478 } else if (kind() == K_BSD) {
479 symbol_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) /
480 (sizeof(uint32_t) * 2);
482 uint32_t member_count = 0;
483 member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
484 buf += 4 + (member_count * 4); // Skip offsets.
485 symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
487 return symbol_iterator(
488 Symbol(this, symbol_count, 0));
491 Archive::child_iterator Archive::findSym(StringRef name) const {
492 Archive::symbol_iterator bs = symbol_begin();
493 Archive::symbol_iterator es = symbol_end();
495 for (; bs != es; ++bs) {
496 StringRef SymName = bs->getName();
497 if (SymName == name) {
498 ErrorOr<Archive::child_iterator> ResultOrErr = bs->getMember();
499 // FIXME: Should we really eat the error?
500 if (ResultOrErr.getError())
502 return ResultOrErr.get();
508 bool Archive::hasSymbolTable() const {
509 return SymbolTable != child_end();