1 //=-- InstrProfReader.cpp - Instrumented profiling reader -------------------=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains support for reading profiling data for clang's
11 // instrumentation based PGO and coverage.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ProfileData/InstrProfReader.h"
16 #include "llvm/ADT/STLExtras.h"
21 static ErrorOr<std::unique_ptr<MemoryBuffer>>
22 setupMemoryBuffer(std::string Path) {
23 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
24 MemoryBuffer::getFileOrSTDIN(Path);
25 if (std::error_code EC = BufferOrErr.getError())
27 return std::move(BufferOrErr.get());
30 static std::error_code initializeReader(InstrProfReader &Reader) {
31 return Reader.readHeader();
34 ErrorOr<std::unique_ptr<InstrProfReader>>
35 InstrProfReader::create(std::string Path) {
36 // Set up the buffer to read.
37 auto BufferOrError = setupMemoryBuffer(Path);
38 if (std::error_code EC = BufferOrError.getError())
40 return InstrProfReader::create(std::move(BufferOrError.get()));
43 ErrorOr<std::unique_ptr<InstrProfReader>>
44 InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
45 // Sanity check the buffer.
46 if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
47 return instrprof_error::too_large;
49 std::unique_ptr<InstrProfReader> Result;
51 if (IndexedInstrProfReader::hasFormat(*Buffer))
52 Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
53 else if (RawInstrProfReader64::hasFormat(*Buffer))
54 Result.reset(new RawInstrProfReader64(std::move(Buffer)));
55 else if (RawInstrProfReader32::hasFormat(*Buffer))
56 Result.reset(new RawInstrProfReader32(std::move(Buffer)));
57 else if (TextInstrProfReader::hasFormat(*Buffer))
58 Result.reset(new TextInstrProfReader(std::move(Buffer)));
60 return instrprof_error::unrecognized_format;
62 // Initialize the reader and return the result.
63 if (std::error_code EC = initializeReader(*Result))
66 return std::move(Result);
69 ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
70 IndexedInstrProfReader::create(std::string Path) {
71 // Set up the buffer to read.
72 auto BufferOrError = setupMemoryBuffer(Path);
73 if (std::error_code EC = BufferOrError.getError())
75 return IndexedInstrProfReader::create(std::move(BufferOrError.get()));
79 ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
80 IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
81 // Sanity check the buffer.
82 if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
83 return instrprof_error::too_large;
86 if (!IndexedInstrProfReader::hasFormat(*Buffer))
87 return instrprof_error::bad_magic;
88 auto Result = llvm::make_unique<IndexedInstrProfReader>(std::move(Buffer));
90 // Initialize the reader and return the result.
91 if (std::error_code EC = initializeReader(*Result))
94 return std::move(Result);
97 void InstrProfIterator::Increment() {
98 if (Reader->readNextRecord(Record))
99 *this = InstrProfIterator();
102 bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
103 // Verify that this really looks like plain ASCII text by checking a
104 // 'reasonable' number of characters (up to profile magic size).
105 size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t));
106 StringRef buffer = Buffer.getBufferStart();
108 std::all_of(buffer.begin(), buffer.begin() + count,
109 [](char c) { return ::isprint(c) || ::isspace(c); });
112 std::error_code TextInstrProfReader::readHeader() {
113 Symtab.reset(new InstrProfSymtab());
118 TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
120 #define CHECK_LINE_END(Line) \
121 if (Line.is_at_end()) \
122 return error(instrprof_error::truncated);
123 #define READ_NUM(Str, Dst) \
124 if ((Str).getAsInteger(10, (Dst))) \
125 return error(instrprof_error::malformed);
126 #define VP_READ_ADVANCE(Val) \
127 CHECK_LINE_END(Line); \
129 READ_NUM((*Line), (Val)); \
132 if (Line.is_at_end())
135 uint32_t NumValueKinds;
136 if (Line->getAsInteger(10, NumValueKinds)) {
137 // No value profile data
140 if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
141 return error(instrprof_error::malformed);
144 for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
145 VP_READ_ADVANCE(ValueKind);
146 if (ValueKind > IPVK_Last)
147 return error(instrprof_error::malformed);
148 VP_READ_ADVANCE(NumValueSites);
152 Record.reserveSites(VK, NumValueSites);
153 for (uint32_t S = 0; S < NumValueSites; S++) {
154 VP_READ_ADVANCE(NumValueData);
156 std::vector<InstrProfValueData> CurrentValues;
157 for (uint32_t V = 0; V < NumValueData; V++) {
158 CHECK_LINE_END(Line);
159 std::pair<StringRef, StringRef> VD = Line->split(':');
160 uint64_t TakenCount, Value;
161 if (VK == IPVK_IndirectCallTarget) {
162 Symtab->addFuncName(VD.first);
163 Value = IndexedInstrProf::ComputeHash(VD.first);
165 READ_NUM(VD.first, Value);
167 READ_NUM(VD.second, TakenCount);
168 CurrentValues.push_back({Value, TakenCount});
171 Record.addValueData(VK, S, CurrentValues.data(), NumValueData, nullptr);
176 #undef CHECK_LINE_END
178 #undef VP_READ_ADVANCE
181 std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
182 // Skip empty lines and comments.
183 while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
185 // If we hit EOF while looking for a name, we're done.
186 if (Line.is_at_end()) {
187 Symtab->finalizeSymtab();
188 return error(instrprof_error::eof);
191 // Read the function name.
192 Record.Name = *Line++;
193 Symtab->addFuncName(Record.Name);
195 // Read the function hash.
196 if (Line.is_at_end())
197 return error(instrprof_error::truncated);
198 if ((Line++)->getAsInteger(0, Record.Hash))
199 return error(instrprof_error::malformed);
201 // Read the number of counters.
202 uint64_t NumCounters;
203 if (Line.is_at_end())
204 return error(instrprof_error::truncated);
205 if ((Line++)->getAsInteger(10, NumCounters))
206 return error(instrprof_error::malformed);
207 if (NumCounters == 0)
208 return error(instrprof_error::malformed);
210 // Read each counter and fill our internal storage with the values.
211 Record.Counts.clear();
212 Record.Counts.reserve(NumCounters);
213 for (uint64_t I = 0; I < NumCounters; ++I) {
214 if (Line.is_at_end())
215 return error(instrprof_error::truncated);
217 if ((Line++)->getAsInteger(10, Count))
218 return error(instrprof_error::malformed);
219 Record.Counts.push_back(Count);
222 // Check if value profile data exists and read it if so.
223 if (std::error_code EC = readValueProfileData(Record))
226 // This is needed to avoid two pass parsing because llvm-profdata
227 // does dumping while reading.
228 Symtab->finalizeSymtab();
232 template <class IntPtrT>
233 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
234 if (DataBuffer.getBufferSize() < sizeof(uint64_t))
237 *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
238 return RawInstrProf::getMagic<IntPtrT>() == Magic ||
239 sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;
242 template <class IntPtrT>
243 std::error_code RawInstrProfReader<IntPtrT>::readHeader() {
244 if (!hasFormat(*DataBuffer))
245 return error(instrprof_error::bad_magic);
246 if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
247 return error(instrprof_error::bad_header);
248 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
249 DataBuffer->getBufferStart());
250 ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
251 return readHeader(*Header);
254 template <class IntPtrT>
256 RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
257 const char *End = DataBuffer->getBufferEnd();
258 // Skip zero padding between profiles.
259 while (CurrentPos != End && *CurrentPos == 0)
261 // If there's nothing left, we're done.
262 if (CurrentPos == End)
263 return instrprof_error::eof;
264 // If there isn't enough space for another header, this is probably just
265 // garbage at the end of the file.
266 if (CurrentPos + sizeof(RawInstrProf::Header) > End)
267 return instrprof_error::malformed;
268 // The writer ensures each profile is padded to start at an aligned address.
269 if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>())
270 return instrprof_error::malformed;
271 // The magic should have the same byte order as in the previous header.
272 uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
273 if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
274 return instrprof_error::bad_magic;
276 // There's another profile to read, so we need to process the header.
277 auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
278 return readHeader(*Header);
281 template <class IntPtrT>
282 void RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
283 for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
284 StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize));
285 Symtab.addFuncName(FunctionName);
286 const IntPtrT FPtr = swap(I->FunctionPointer);
289 Symtab.mapAddress(FPtr, IndexedInstrProf::ComputeHash(FunctionName));
291 Symtab.finalizeSymtab();
294 template <class IntPtrT>
296 RawInstrProfReader<IntPtrT>::readHeader(const RawInstrProf::Header &Header) {
297 if (swap(Header.Version) != RawInstrProf::Version)
298 return error(instrprof_error::unsupported_version);
300 CountersDelta = swap(Header.CountersDelta);
301 NamesDelta = swap(Header.NamesDelta);
302 auto DataSize = swap(Header.DataSize);
303 auto CountersSize = swap(Header.CountersSize);
304 auto NamesSize = swap(Header.NamesSize);
305 auto ValueDataSize = swap(Header.ValueDataSize);
306 ValueKindLast = swap(Header.ValueKindLast);
308 auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>);
309 auto PaddingSize = getNumPaddingBytes(NamesSize);
311 ptrdiff_t DataOffset = sizeof(RawInstrProf::Header);
312 ptrdiff_t CountersOffset = DataOffset + DataSizeInBytes;
313 ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize;
314 ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
315 size_t ProfileSize = ValueDataOffset + ValueDataSize;
317 auto *Start = reinterpret_cast<const char *>(&Header);
318 if (Start + ProfileSize > DataBuffer->getBufferEnd())
319 return error(instrprof_error::bad_header);
321 Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
323 DataEnd = Data + DataSize;
324 CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);
325 NamesStart = Start + NamesOffset;
326 ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
327 ProfileEnd = Start + ProfileSize;
329 std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
330 createSymtab(*NewSymtab.get());
331 Symtab = std::move(NewSymtab);
335 template <class IntPtrT>
336 std::error_code RawInstrProfReader<IntPtrT>::readName(InstrProfRecord &Record) {
337 Record.Name = StringRef(getName(Data->NamePtr), swap(Data->NameSize));
338 if (Record.Name.data() < NamesStart ||
339 Record.Name.data() + Record.Name.size() >
340 reinterpret_cast<const char *>(ValueDataStart))
341 return error(instrprof_error::malformed);
345 template <class IntPtrT>
346 std::error_code RawInstrProfReader<IntPtrT>::readFuncHash(
347 InstrProfRecord &Record) {
348 Record.Hash = swap(Data->FuncHash);
352 template <class IntPtrT>
353 std::error_code RawInstrProfReader<IntPtrT>::readRawCounts(
354 InstrProfRecord &Record) {
355 uint32_t NumCounters = swap(Data->NumCounters);
356 IntPtrT CounterPtr = Data->CounterPtr;
357 if (NumCounters == 0)
358 return error(instrprof_error::malformed);
360 auto RawCounts = makeArrayRef(getCounter(CounterPtr), NumCounters);
361 auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart);
364 if (RawCounts.data() < CountersStart ||
365 RawCounts.data() + RawCounts.size() > NamesStartAsCounter)
366 return error(instrprof_error::malformed);
368 if (ShouldSwapBytes) {
369 Record.Counts.clear();
370 Record.Counts.reserve(RawCounts.size());
371 for (uint64_t Count : RawCounts)
372 Record.Counts.push_back(swap(Count));
374 Record.Counts = RawCounts;
379 template <class IntPtrT>
381 RawInstrProfReader<IntPtrT>::readValueProfilingData(InstrProfRecord &Record) {
383 Record.clearValueData();
384 CurValueDataSize = 0;
385 // Need to match the logic in value profile dumper code in compiler-rt:
386 uint32_t NumValueKinds = 0;
387 for (uint32_t I = 0; I < IPVK_Last + 1; I++)
388 NumValueKinds += (Data->NumValueSites[I] != 0);
393 ErrorOr<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
394 ValueProfData::getValueProfData(ValueDataStart,
395 (const unsigned char *)ProfileEnd,
396 getDataEndianness());
398 if (VDataPtrOrErr.getError())
399 return VDataPtrOrErr.getError();
401 VDataPtrOrErr.get()->deserializeTo(Record, &Symtab->getAddrHashMap());
402 CurValueDataSize = VDataPtrOrErr.get()->getSize();
406 template <class IntPtrT>
408 RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
410 if (std::error_code EC = readNextHeader(ProfileEnd))
413 // Read name ad set it in Record.
414 if (std::error_code EC = readName(Record))
417 // Read FuncHash and set it in Record.
418 if (std::error_code EC = readFuncHash(Record))
421 // Read raw counts and set Record.
422 if (std::error_code EC = readRawCounts(Record))
425 // Read value data and set Record.
426 if (std::error_code EC = readValueProfilingData(Record))
435 template class RawInstrProfReader<uint32_t>;
436 template class RawInstrProfReader<uint64_t>;
439 InstrProfLookupTrait::hash_value_type
440 InstrProfLookupTrait::ComputeHash(StringRef K) {
441 return IndexedInstrProf::ComputeHash(HashType, K);
444 typedef InstrProfLookupTrait::data_type data_type;
445 typedef InstrProfLookupTrait::offset_type offset_type;
447 bool InstrProfLookupTrait::readValueProfilingData(
448 const unsigned char *&D, const unsigned char *const End) {
449 ErrorOr<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
450 ValueProfData::getValueProfData(D, End, ValueProfDataEndianness);
452 if (VDataPtrOrErr.getError())
455 VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr);
456 D += VDataPtrOrErr.get()->TotalSize;
461 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
463 // Check if the data is corrupt. If so, don't try to read it.
464 if (N % sizeof(uint64_t))
468 std::vector<uint64_t> CounterBuffer;
470 using namespace support;
471 const unsigned char *End = D + N;
474 if (D + sizeof(uint64_t) >= End)
476 uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
478 // Initialize number of counters for FormatVersion == 1.
479 uint64_t CountsSize = N / sizeof(uint64_t) - 1;
480 // If format version is different then read the number of counters.
481 if (FormatVersion != 1) {
482 if (D + sizeof(uint64_t) > End)
484 CountsSize = endian::readNext<uint64_t, little, unaligned>(D);
486 // Read counter values.
487 if (D + CountsSize * sizeof(uint64_t) > End)
490 CounterBuffer.clear();
491 CounterBuffer.reserve(CountsSize);
492 for (uint64_t J = 0; J < CountsSize; ++J)
493 CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
495 DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer));
497 // Read value profiling data.
498 if (FormatVersion > 2 && !readValueProfilingData(D, End)) {
506 template <typename HashTableImpl>
507 std::error_code InstrProfReaderIndex<HashTableImpl>::getRecords(
508 StringRef FuncName, ArrayRef<InstrProfRecord> &Data) {
509 auto Iter = HashTable->find(FuncName);
510 if (Iter == HashTable->end())
511 return instrprof_error::unknown_function;
515 return instrprof_error::malformed;
517 return instrprof_error::success;
520 template <typename HashTableImpl>
521 std::error_code InstrProfReaderIndex<HashTableImpl>::getRecords(
522 ArrayRef<InstrProfRecord> &Data) {
524 return instrprof_error::eof;
526 Data = *RecordIterator;
529 return instrprof_error::malformed;
531 return instrprof_error::success;
534 template <typename HashTableImpl>
535 InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
536 const unsigned char *Buckets, const unsigned char *const Payload,
537 const unsigned char *const Base, IndexedInstrProf::HashT HashType,
539 FormatVersion = Version;
540 HashTable.reset(HashTableImpl::Create(
541 Buckets, Payload, Base,
542 typename HashTableImpl::InfoType(HashType, Version)));
543 RecordIterator = HashTable->data_begin();
546 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
547 if (DataBuffer.getBufferSize() < 8)
549 using namespace support;
551 endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
552 // Verify that it's magical.
553 return Magic == IndexedInstrProf::Magic;
556 std::error_code IndexedInstrProfReader::readHeader() {
557 const unsigned char *Start =
558 (const unsigned char *)DataBuffer->getBufferStart();
559 const unsigned char *Cur = Start;
560 if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
561 return error(instrprof_error::truncated);
563 using namespace support;
565 auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);
566 Cur += sizeof(IndexedInstrProf::Header);
568 // Check the magic number.
569 uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic);
570 if (Magic != IndexedInstrProf::Magic)
571 return error(instrprof_error::bad_magic);
574 uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version);
575 if (FormatVersion > IndexedInstrProf::Version)
576 return error(instrprof_error::unsupported_version);
578 // Read the maximal function count.
580 endian::byte_swap<uint64_t, little>(Header->MaxFunctionCount);
582 // Read the hash type and start offset.
583 IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
584 endian::byte_swap<uint64_t, little>(Header->HashType));
585 if (HashType > IndexedInstrProf::HashT::Last)
586 return error(instrprof_error::unsupported_hash_type);
588 uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
590 // The rest of the file is an on disk hash table.
591 InstrProfReaderIndexBase *IndexPtr = nullptr;
592 IndexPtr = new InstrProfReaderIndex<OnDiskHashTableImplV3>(
593 Start + HashOffset, Cur, Start, HashType, FormatVersion);
594 Index.reset(IndexPtr);
598 InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
600 return *Symtab.get();
602 std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
603 Index->populateSymtab(*NewSymtab.get());
605 Symtab = std::move(NewSymtab);
606 return *Symtab.get();
609 ErrorOr<InstrProfRecord>
610 IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
612 ArrayRef<InstrProfRecord> Data;
613 std::error_code EC = Index->getRecords(FuncName, Data);
614 if (EC != instrprof_error::success)
616 // Found it. Look for counters with the right hash.
617 for (unsigned I = 0, E = Data.size(); I < E; ++I) {
618 // Check for a match and fill the vector if there is one.
619 if (Data[I].Hash == FuncHash) {
620 return std::move(Data[I]);
623 return error(instrprof_error::hash_mismatch);
627 IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
628 std::vector<uint64_t> &Counts) {
629 ErrorOr<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
630 if (std::error_code EC = Record.getError())
633 Counts = Record.get().Counts;
637 std::error_code IndexedInstrProfReader::readNextRecord(
638 InstrProfRecord &Record) {
639 static unsigned RecordIndex = 0;
641 ArrayRef<InstrProfRecord> Data;
643 std::error_code EC = Index->getRecords(Data);
644 if (EC != instrprof_error::success)
647 Record = Data[RecordIndex++];
648 if (RecordIndex >= Data.size()) {
649 Index->advanceToNextKey();