1 //=-- InstrProfReader.h - Instrumented profiling readers ----------*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains support for reading profiling data for instrumentation
11 // based PGO and coverage.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
16 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/Support/EndianStream.h"
22 #include "llvm/Support/ErrorOr.h"
23 #include "llvm/Support/LineIterator.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/OnDiskHashTable.h"
26 #include "llvm/Support/raw_ostream.h"
31 class InstrProfReader;
33 /// A file format agnostic iterator over profiling data.
34 class InstrProfIterator : public std::iterator<std::input_iterator_tag,
36 InstrProfReader *Reader;
37 InstrProfRecord Record;
41 InstrProfIterator() : Reader(nullptr) {}
42 InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
44 InstrProfIterator &operator++() { Increment(); return *this; }
45 bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
46 bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
47 InstrProfRecord &operator*() { return Record; }
48 InstrProfRecord *operator->() { return &Record; }
51 /// Base class and interface for reading profiling data of any known instrprof
52 /// format. Provides an iterator over InstrProfRecords.
53 class InstrProfReader {
54 std::error_code LastError;
57 InstrProfReader() : LastError(instrprof_error::success), Symtab() {}
58 virtual ~InstrProfReader() {}
60 /// Read the header. Required before reading first record.
61 virtual std::error_code readHeader() = 0;
62 /// Read a single record.
63 virtual std::error_code readNextRecord(InstrProfRecord &Record) = 0;
64 /// Iterator over profile data.
65 InstrProfIterator begin() { return InstrProfIterator(this); }
66 InstrProfIterator end() { return InstrProfIterator(); }
68 /// Return the PGO symtab. There are three different readers:
69 /// Raw, Text, and Indexed profile readers. The first two types
70 /// of readers are used only by llvm-profdata tool, while the indexed
71 /// profile reader is also used by llvm-cov tool and the compiler (
72 /// backend or frontend). Since creating PGO symtab can create
73 /// significant runtime and memory overhead (as it touches data
74 /// for the whole program), InstrProfSymtab for the indexed profile
75 /// reader should be created on demand and it is recommended to be
76 /// only used for dumping purpose with llvm-proftool, not with the
78 virtual InstrProfSymtab &getSymtab() = 0;
81 std::unique_ptr<InstrProfSymtab> Symtab;
82 /// Set the current std::error_code and return same.
83 std::error_code error(std::error_code EC) {
88 /// Clear the current error code and return a successful one.
89 std::error_code success() { return error(instrprof_error::success); }
92 /// Return true if the reader has finished reading the profile data.
93 bool isEOF() { return LastError == instrprof_error::eof; }
94 /// Return true if the reader encountered an error reading profiling data.
95 bool hasError() { return LastError && !isEOF(); }
96 /// Get the current error code.
97 std::error_code getError() { return LastError; }
99 /// Factory method to create an appropriately typed reader for the given
101 static ErrorOr<std::unique_ptr<InstrProfReader>> create(std::string Path);
103 static ErrorOr<std::unique_ptr<InstrProfReader>>
104 create(std::unique_ptr<MemoryBuffer> Buffer);
107 /// Reader for the simple text based instrprof format.
109 /// This format is a simple text format that's suitable for test data. Records
110 /// are separated by one or more blank lines, and record fields are separated by
113 /// Each record consists of a function name, a function hash, a number of
114 /// counters, and then each counter value, in that order.
115 class TextInstrProfReader : public InstrProfReader {
117 /// The profile data file contents.
118 std::unique_ptr<MemoryBuffer> DataBuffer;
119 /// Iterator over the profile data.
122 TextInstrProfReader(const TextInstrProfReader &) = delete;
123 TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
124 std::error_code readValueProfileData(InstrProfRecord &Record);
127 TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
128 : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
130 /// Return true if the given buffer is in text instrprof format.
131 static bool hasFormat(const MemoryBuffer &Buffer);
134 std::error_code readHeader() override;
135 /// Read a single record.
136 std::error_code readNextRecord(InstrProfRecord &Record) override;
138 InstrProfSymtab &getSymtab() override {
139 assert(Symtab.get());
140 return *Symtab.get();
144 /// Reader for the raw instrprof binary format from runtime.
146 /// This format is a raw memory dump of the instrumentation-baed profiling data
147 /// from the runtime. It has no index.
149 /// Templated on the unsigned type whose size matches pointers on the platform
150 /// that wrote the profile.
151 template <class IntPtrT>
152 class RawInstrProfReader : public InstrProfReader {
154 /// The profile data file contents.
155 std::unique_ptr<MemoryBuffer> DataBuffer;
156 bool ShouldSwapBytes;
157 uint64_t CountersDelta;
159 const RawInstrProf::ProfileData<IntPtrT> *Data;
160 const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
161 const uint64_t *CountersStart;
162 const char *NamesStart;
163 const uint8_t *ValueDataStart;
164 const char *ProfileEnd;
165 uint32_t ValueKindLast;
166 uint32_t CurValueDataSize;
168 InstrProfRecord::ValueMapType FunctionPtrToNameMap;
170 RawInstrProfReader(const RawInstrProfReader &) = delete;
171 RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
173 RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
174 : DataBuffer(std::move(DataBuffer)) { }
176 static bool hasFormat(const MemoryBuffer &DataBuffer);
177 std::error_code readHeader() override;
178 std::error_code readNextRecord(InstrProfRecord &Record) override;
180 InstrProfSymtab &getSymtab() override {
181 assert(Symtab.get());
182 return *Symtab.get();
186 void createSymtab(InstrProfSymtab &Symtab);
187 std::error_code readNextHeader(const char *CurrentPos);
188 std::error_code readHeader(const RawInstrProf::Header &Header);
189 template <class IntT> IntT swap(IntT Int) const {
190 return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
192 support::endianness getDataEndianness() const {
193 support::endianness HostEndian = getHostEndianness();
194 if (!ShouldSwapBytes)
196 if (HostEndian == support::little)
199 return support::little;
202 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
203 return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
205 std::error_code readName(InstrProfRecord &Record);
206 std::error_code readFuncHash(InstrProfRecord &Record);
207 std::error_code readRawCounts(InstrProfRecord &Record);
208 std::error_code readValueProfilingData(InstrProfRecord &Record);
209 bool atEnd() const { return Data == DataEnd; }
212 ValueDataStart += CurValueDataSize;
215 const uint64_t *getCounter(IntPtrT CounterPtr) const {
216 ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
217 return CountersStart + Offset;
219 const char *getName(IntPtrT NamePtr) const {
220 ptrdiff_t Offset = (swap(NamePtr) - NamesDelta) / sizeof(char);
221 return NamesStart + Offset;
225 typedef RawInstrProfReader<uint32_t> RawInstrProfReader32;
226 typedef RawInstrProfReader<uint64_t> RawInstrProfReader64;
228 namespace IndexedInstrProf {
229 enum class HashT : uint32_t;
232 /// Trait for lookups into the on-disk hash table for the binary instrprof
234 class InstrProfLookupTrait {
235 std::vector<InstrProfRecord> DataBuffer;
236 IndexedInstrProf::HashT HashType;
237 unsigned FormatVersion;
238 // Endianness of the input value profile data.
239 // It should be LE by default, but can be changed
240 // for testing purpose.
241 support::endianness ValueProfDataEndianness;
244 InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
245 : HashType(HashType), FormatVersion(FormatVersion),
246 ValueProfDataEndianness(support::little) {}
248 typedef ArrayRef<InstrProfRecord> data_type;
250 typedef StringRef internal_key_type;
251 typedef StringRef external_key_type;
252 typedef uint64_t hash_value_type;
253 typedef uint64_t offset_type;
255 static bool EqualKey(StringRef A, StringRef B) { return A == B; }
256 static StringRef GetInternalKey(StringRef K) { return K; }
257 static StringRef GetExternalKey(StringRef K) { return K; }
259 hash_value_type ComputeHash(StringRef K);
261 static std::pair<offset_type, offset_type>
262 ReadKeyDataLength(const unsigned char *&D) {
263 using namespace support;
264 offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
265 offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
266 return std::make_pair(KeyLen, DataLen);
269 StringRef ReadKey(const unsigned char *D, offset_type N) {
270 return StringRef((const char *)D, N);
273 bool readValueProfilingData(const unsigned char *&D,
274 const unsigned char *const End);
275 data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
277 // Used for testing purpose only.
278 void setValueProfDataEndianness(support::endianness Endianness) {
279 ValueProfDataEndianness = Endianness;
283 struct InstrProfReaderIndexBase {
284 // Read all the profile records with the same key pointed to the current
286 virtual std::error_code getRecords(ArrayRef<InstrProfRecord> &Data) = 0;
287 // Read all the profile records with the key equal to FuncName
288 virtual std::error_code getRecords(StringRef FuncName,
289 ArrayRef<InstrProfRecord> &Data) = 0;
290 virtual void advanceToNextKey() = 0;
291 virtual bool atEnd() const = 0;
292 virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
293 virtual ~InstrProfReaderIndexBase() {}
294 virtual uint64_t getVersion() const = 0;
295 virtual void populateSymtab(InstrProfSymtab &) = 0;
298 typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
299 OnDiskHashTableImplV3;
301 template <typename HashTableImpl>
302 class InstrProfReaderIndex : public InstrProfReaderIndexBase {
305 std::unique_ptr<HashTableImpl> HashTable;
306 typename HashTableImpl::data_iterator RecordIterator;
307 uint64_t FormatVersion;
310 InstrProfReaderIndex(const unsigned char *Buckets,
311 const unsigned char *const Payload,
312 const unsigned char *const Base,
313 IndexedInstrProf::HashT HashType, uint64_t Version);
315 std::error_code getRecords(ArrayRef<InstrProfRecord> &Data) override;
316 std::error_code getRecords(StringRef FuncName,
317 ArrayRef<InstrProfRecord> &Data) override;
318 void advanceToNextKey() override { RecordIterator++; }
319 bool atEnd() const override {
320 return RecordIterator == HashTable->data_end();
322 void setValueProfDataEndianness(support::endianness Endianness) override {
323 HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
325 ~InstrProfReaderIndex() override {}
326 uint64_t getVersion() const override { return FormatVersion; }
327 void populateSymtab(InstrProfSymtab &Symtab) override {
328 Symtab.create(HashTable->keys());
332 /// Reader for the indexed binary instrprof format.
333 class IndexedInstrProfReader : public InstrProfReader {
335 /// The profile data file contents.
336 std::unique_ptr<MemoryBuffer> DataBuffer;
337 /// The index into the profile data.
338 std::unique_ptr<InstrProfReaderIndexBase> Index;
339 /// The maximal execution count among all functions.
340 uint64_t MaxFunctionCount;
342 IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
343 IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
346 uint64_t getVersion() const { return Index->getVersion(); }
347 IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
348 : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
350 /// Return true if the given buffer is in an indexed instrprof format.
351 static bool hasFormat(const MemoryBuffer &DataBuffer);
353 /// Read the file header.
354 std::error_code readHeader() override;
355 /// Read a single record.
356 std::error_code readNextRecord(InstrProfRecord &Record) override;
358 /// Return the pointer to InstrProfRecord associated with FuncName
360 ErrorOr<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
363 /// Fill Counts with the profile data for the given function name.
364 std::error_code getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
365 std::vector<uint64_t> &Counts);
367 /// Return the maximum of all known function counts.
368 uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
370 /// Factory method to create an indexed reader.
371 static ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
372 create(std::string Path);
374 static ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
375 create(std::unique_ptr<MemoryBuffer> Buffer);
377 // Used for testing purpose only.
378 void setValueProfDataEndianness(support::endianness Endianness) {
379 Index->setValueProfDataEndianness(Endianness);
382 // See description in the base class. This interface is designed
383 // to be used by llvm-profdata (for dumping). Avoid using this when
384 // the client is the compiler.
385 InstrProfSymtab &getSymtab() override;
388 } // end namespace llvm