1 //=-- InstrProfReader.h - Instrumented profiling readers ----------*- C++ -*-=//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains support for reading profiling data for instrumentation
11 // based PGO and coverage.
13 //===----------------------------------------------------------------------===//
15 #ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
16 #define LLVM_PROFILEDATA_INSTRPROFREADER_H
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ProfileData/InstrProf.h"
21 #include "llvm/Support/EndianStream.h"
22 #include "llvm/Support/ErrorOr.h"
23 #include "llvm/Support/raw_ostream.h"
24 #include "llvm/Support/LineIterator.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/OnDiskHashTable.h"
32 class InstrProfReader;
34 /// A file format agnostic iterator over profiling data.
35 class InstrProfIterator : public std::iterator<std::input_iterator_tag,
37 InstrProfReader *Reader;
38 InstrProfRecord Record;
42 InstrProfIterator() : Reader(nullptr) {}
43 InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
45 InstrProfIterator &operator++() { Increment(); return *this; }
46 bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
47 bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
48 InstrProfRecord &operator*() { return Record; }
49 InstrProfRecord *operator->() { return &Record; }
52 /// Base class and interface for reading profiling data of any known instrprof
53 /// format. Provides an iterator over InstrProfRecords.
54 class InstrProfReader {
55 std::error_code LastError;
58 InstrProfReader() : LastError(instrprof_error::success) {}
59 virtual ~InstrProfReader() {}
61 /// Read the header. Required before reading first record.
62 virtual std::error_code readHeader() = 0;
63 /// Read a single record.
64 virtual std::error_code readNextRecord(InstrProfRecord &Record) = 0;
65 /// Iterator over profile data.
66 InstrProfIterator begin() { return InstrProfIterator(this); }
67 InstrProfIterator end() { return InstrProfIterator(); }
70 /// Set the current std::error_code and return same.
71 std::error_code error(std::error_code EC) {
76 /// Clear the current error code and return a successful one.
77 std::error_code success() { return error(instrprof_error::success); }
80 /// Return true if the reader has finished reading the profile data.
81 bool isEOF() { return LastError == instrprof_error::eof; }
82 /// Return true if the reader encountered an error reading profiling data.
83 bool hasError() { return LastError && !isEOF(); }
84 /// Get the current error code.
85 std::error_code getError() { return LastError; }
87 /// Factory method to create an appropriately typed reader for the given
89 static ErrorOr<std::unique_ptr<InstrProfReader>> create(std::string Path);
91 static ErrorOr<std::unique_ptr<InstrProfReader>>
92 create(std::unique_ptr<MemoryBuffer> Buffer);
95 /// Reader for the simple text based instrprof format.
97 /// This format is a simple text format that's suitable for test data. Records
98 /// are separated by one or more blank lines, and record fields are separated by
101 /// Each record consists of a function name, a function hash, a number of
102 /// counters, and then each counter value, in that order.
103 class TextInstrProfReader : public InstrProfReader {
105 /// The profile data file contents.
106 std::unique_ptr<MemoryBuffer> DataBuffer;
107 /// Iterator over the profile data.
110 TextInstrProfReader(const TextInstrProfReader &) = delete;
111 TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
113 TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
114 : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
116 /// Return true if the given buffer is in text instrprof format.
117 static bool hasFormat(const MemoryBuffer &Buffer);
120 std::error_code readHeader() override { return success(); }
121 /// Read a single record.
122 std::error_code readNextRecord(InstrProfRecord &Record) override;
125 /// Reader for the raw instrprof binary format from runtime.
127 /// This format is a raw memory dump of the instrumentation-baed profiling data
128 /// from the runtime. It has no index.
130 /// Templated on the unsigned type whose size matches pointers on the platform
131 /// that wrote the profile.
132 template <class IntPtrT>
133 class RawInstrProfReader : public InstrProfReader {
135 /// The profile data file contents.
136 std::unique_ptr<MemoryBuffer> DataBuffer;
137 bool ShouldSwapBytes;
138 uint64_t CountersDelta;
140 uint64_t ValueDataDelta;
141 const RawInstrProf::ProfileData<IntPtrT> *Data;
142 const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
143 const uint64_t *CountersStart;
144 const char *NamesStart;
145 const uint8_t *ValueDataStart;
146 const char *ProfileEnd;
147 uint32_t ValueKindLast;
149 // String table for holding a unique copy of all the strings in the profile.
150 InstrProfStringTable StringTable;
151 InstrProfRecord::ValueMapType FunctionPtrToNameMap;
153 RawInstrProfReader(const RawInstrProfReader &) = delete;
154 RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
156 RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
157 : DataBuffer(std::move(DataBuffer)) { }
159 static bool hasFormat(const MemoryBuffer &DataBuffer);
160 std::error_code readHeader() override;
161 std::error_code readNextRecord(InstrProfRecord &Record) override;
164 std::error_code readNextHeader(const char *CurrentPos);
165 std::error_code readHeader(const RawInstrProf::Header &Header);
166 template <class IntT>
167 IntT swap(IntT Int) const {
168 return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
170 inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
171 return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
173 std::error_code readName(InstrProfRecord &Record);
174 std::error_code readFuncHash(InstrProfRecord &Record);
175 std::error_code readRawCounts(InstrProfRecord &Record);
176 std::error_code readValueData(InstrProfRecord &Record);
177 bool atEnd() const { return Data == DataEnd; }
178 void advanceData() { Data++; }
180 const uint64_t *getCounter(IntPtrT CounterPtr) const {
181 ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
182 return CountersStart + Offset;
184 const char *getName(IntPtrT NamePtr) const {
185 ptrdiff_t Offset = (swap(NamePtr) - NamesDelta) / sizeof(char);
186 return NamesStart + Offset;
188 const uint8_t *getValueDataCounts(IntPtrT ValueCountsPtr) const {
189 ptrdiff_t Offset = (swap(ValueCountsPtr) - ValueDataDelta) / sizeof(uint8_t);
190 return ValueDataStart + Offset;
192 // This accepts an already byte-swapped ValueDataPtr argument.
193 const InstrProfValueData *getValueData(IntPtrT ValueDataPtr) const {
194 ptrdiff_t Offset = (ValueDataPtr - ValueDataDelta) / sizeof(uint8_t);
195 return reinterpret_cast<const InstrProfValueData*>(ValueDataStart + Offset);
199 typedef RawInstrProfReader<uint32_t> RawInstrProfReader32;
200 typedef RawInstrProfReader<uint64_t> RawInstrProfReader64;
202 namespace IndexedInstrProf {
203 enum class HashT : uint32_t;
206 /// Trait for lookups into the on-disk hash table for the binary instrprof
208 class InstrProfLookupTrait {
209 std::vector<InstrProfRecord> DataBuffer;
210 IndexedInstrProf::HashT HashType;
211 unsigned FormatVersion;
212 // Endianness of the input value profile data.
213 // It should be LE by default, but can be changed
214 // for testing purpose.
215 support::endianness ValueProfDataEndianness;
216 std::vector<std::pair<uint64_t, const char *>> HashKeys;
219 InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
220 : HashType(HashType), FormatVersion(FormatVersion),
221 ValueProfDataEndianness(support::little) {}
223 typedef ArrayRef<InstrProfRecord> data_type;
225 typedef StringRef internal_key_type;
226 typedef StringRef external_key_type;
227 typedef uint64_t hash_value_type;
228 typedef uint64_t offset_type;
230 static bool EqualKey(StringRef A, StringRef B) { return A == B; }
231 static StringRef GetInternalKey(StringRef K) { return K; }
232 static StringRef GetExternalKey(StringRef K) { return K; }
234 hash_value_type ComputeHash(StringRef K);
236 void setHashKeys(std::vector<std::pair<uint64_t, const char *>> HashKeys) {
237 this->HashKeys = std::move(HashKeys);
239 static std::pair<offset_type, offset_type>
240 ReadKeyDataLength(const unsigned char *&D) {
241 using namespace support;
242 offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
243 offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
244 return std::make_pair(KeyLen, DataLen);
247 StringRef ReadKey(const unsigned char *D, offset_type N) {
248 return StringRef((const char *)D, N);
251 bool ReadValueProfilingData(const unsigned char *&D,
252 const unsigned char *const End);
253 data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
255 // Used for testing purpose only.
256 void setValueProfDataEndianness(support::endianness Endianness) {
257 ValueProfDataEndianness = Endianness;
261 class InstrProfReaderIndex {
263 typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait> IndexType;
265 std::unique_ptr<IndexType> Index;
266 IndexType::data_iterator RecordIterator;
267 uint64_t FormatVersion;
269 // String table for holding a unique copy of all the strings in the profile.
270 InstrProfStringTable StringTable;
273 InstrProfReaderIndex() : Index(nullptr) {}
274 void Init(const unsigned char *Buckets, const unsigned char *const Payload,
275 const unsigned char *const Base, IndexedInstrProf::HashT HashType,
278 // Read all the pofile records with the same key pointed to the current
280 std::error_code getRecords(ArrayRef<InstrProfRecord> &Data);
281 // Read all the profile records with the key equal to FuncName
282 std::error_code getRecords(StringRef FuncName,
283 ArrayRef<InstrProfRecord> &Data);
285 void advanceToNextKey() { RecordIterator++; }
286 bool atEnd() const { return RecordIterator == Index->data_end(); }
287 // Used for testing purpose only.
288 void setValueProfDataEndianness(support::endianness Endianness) {
289 Index->getInfoObj().setValueProfDataEndianness(Endianness);
293 /// Reader for the indexed binary instrprof format.
294 class IndexedInstrProfReader : public InstrProfReader {
296 /// The profile data file contents.
297 std::unique_ptr<MemoryBuffer> DataBuffer;
298 /// The index into the profile data.
299 InstrProfReaderIndex Index;
300 /// The maximal execution count among all functions.
301 uint64_t MaxFunctionCount;
303 IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
304 IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
307 IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
308 : DataBuffer(std::move(DataBuffer)), Index() {}
310 /// Return true if the given buffer is in an indexed instrprof format.
311 static bool hasFormat(const MemoryBuffer &DataBuffer);
313 /// Read the file header.
314 std::error_code readHeader() override;
315 /// Read a single record.
316 std::error_code readNextRecord(InstrProfRecord &Record) override;
318 /// Return the pointer to InstrProfRecord associated with FuncName
320 ErrorOr<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
323 /// Fill Counts with the profile data for the given function name.
324 std::error_code getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
325 std::vector<uint64_t> &Counts);
327 /// Return the maximum of all known function counts.
328 uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
330 /// Factory method to create an indexed reader.
331 static ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
332 create(std::string Path);
334 static ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
335 create(std::unique_ptr<MemoryBuffer> Buffer);
337 // Used for testing purpose only.
338 void setValueProfDataEndianness(support::endianness Endianness) {
339 Index.setValueProfDataEndianness(Endianness);
343 } // end namespace llvm